x86/smpboot: Init apic mapping before usage
[cascardo/linux.git] / lib / lz4 / lz4hc_compress.c
1 /*
2  * LZ4 HC - High Compression Mode of LZ4
3  * Copyright (C) 2011-2012, Yann Collet.
4  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are
8  * met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above
13  * copyright notice, this list of conditions and the following disclaimer
14  * in the documentation and/or other materials provided with the
15  * distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  * You can contact the author at :
30  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31  * - LZ4 source repository : http://code.google.com/p/lz4/
32  *
33  *  Changed for kernel use by:
34  *  Chanho Min <chanho.min@lge.com>
35  */
36
37 #include <linux/module.h>
38 #include <linux/kernel.h>
39 #include <linux/lz4.h>
40 #include <asm/unaligned.h>
41 #include "lz4defs.h"
42
43 struct lz4hc_data {
44         const u8 *base;
45         HTYPE hashtable[HASHTABLESIZE];
46         u16 chaintable[MAXD];
47         const u8 *nexttoupdate;
48 } __attribute__((__packed__));
49
50 static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
51 {
52         memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
53         memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
54
55 #if LZ4_ARCH64
56         hc4->nexttoupdate = base + 1;
57 #else
58         hc4->nexttoupdate = base;
59 #endif
60         hc4->base = base;
61         return 1;
62 }
63
64 /* Update chains up to ip (excluded) */
65 static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
66 {
67         u16 *chaintable = hc4->chaintable;
68         HTYPE *hashtable  = hc4->hashtable;
69 #if LZ4_ARCH64
70         const BYTE * const base = hc4->base;
71 #else
72         const int base = 0;
73 #endif
74
75         while (hc4->nexttoupdate < ip) {
76                 const u8 *p = hc4->nexttoupdate;
77                 size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
78                 if (delta > MAX_DISTANCE)
79                         delta = MAX_DISTANCE;
80                 chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
81                 hashtable[HASH_VALUE(p)] = (p) - base;
82                 hc4->nexttoupdate++;
83         }
84 }
85
86 static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
87                 const u8 *const matchlimit)
88 {
89         const u8 *p1t = p1;
90
91         while (p1t < matchlimit - (STEPSIZE - 1)) {
92 #if LZ4_ARCH64
93                 u64 diff = A64(p2) ^ A64(p1t);
94 #else
95                 u32 diff = A32(p2) ^ A32(p1t);
96 #endif
97                 if (!diff) {
98                         p1t += STEPSIZE;
99                         p2 += STEPSIZE;
100                         continue;
101                 }
102                 p1t += LZ4_NBCOMMONBYTES(diff);
103                 return p1t - p1;
104         }
105 #if LZ4_ARCH64
106         if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
107                 p1t += 4;
108                 p2 += 4;
109         }
110 #endif
111
112         if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
113                 p1t += 2;
114                 p2 += 2;
115         }
116         if ((p1t < matchlimit) && (*p2 == *p1t))
117                 p1t++;
118         return p1t - p1;
119 }
120
121 static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
122                 const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
123 {
124         u16 *const chaintable = hc4->chaintable;
125         HTYPE *const hashtable = hc4->hashtable;
126         const u8 *ref;
127 #if LZ4_ARCH64
128         const BYTE * const base = hc4->base;
129 #else
130         const int base = 0;
131 #endif
132         int nbattempts = MAX_NB_ATTEMPTS;
133         size_t repl = 0, ml = 0;
134         u16 delta;
135
136         /* HC4 match finder */
137         lz4hc_insert(hc4, ip);
138         ref = hashtable[HASH_VALUE(ip)] + base;
139
140         /* potential repetition */
141         if (ref >= ip-4) {
142                 /* confirmed */
143                 if (A32(ref) == A32(ip)) {
144                         delta = (u16)(ip-ref);
145                         repl = ml  = lz4hc_commonlength(ip + MINMATCH,
146                                         ref + MINMATCH, matchlimit) + MINMATCH;
147                         *matchpos = ref;
148                 }
149                 ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
150         }
151
152         while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
153                 nbattempts--;
154                 if (*(ref + ml) == *(ip + ml)) {
155                         if (A32(ref) == A32(ip)) {
156                                 size_t mlt =
157                                         lz4hc_commonlength(ip + MINMATCH,
158                                         ref + MINMATCH, matchlimit) + MINMATCH;
159                                 if (mlt > ml) {
160                                         ml = mlt;
161                                         *matchpos = ref;
162                                 }
163                         }
164                 }
165                 ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
166         }
167
168         /* Complete table */
169         if (repl) {
170                 const BYTE *ptr = ip;
171                 const BYTE *end;
172                 end = ip + repl - (MINMATCH-1);
173                 /* Pre-Load */
174                 while (ptr < end - delta) {
175                         chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
176                         ptr++;
177                 }
178                 do {
179                         chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
180                         /* Head of chain */
181                         hashtable[HASH_VALUE(ptr)] = (ptr) - base;
182                         ptr++;
183                 } while (ptr < end);
184                 hc4->nexttoupdate = end;
185         }
186
187         return (int)ml;
188 }
189
190 static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
191         const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
192         const u8 **matchpos, const u8 **startpos)
193 {
194         u16 *const chaintable = hc4->chaintable;
195         HTYPE *const hashtable = hc4->hashtable;
196 #if LZ4_ARCH64
197         const BYTE * const base = hc4->base;
198 #else
199         const int base = 0;
200 #endif
201         const u8 *ref;
202         int nbattempts = MAX_NB_ATTEMPTS;
203         int delta = (int)(ip - startlimit);
204
205         /* First Match */
206         lz4hc_insert(hc4, ip);
207         ref = hashtable[HASH_VALUE(ip)] + base;
208
209         while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
210                 && (nbattempts)) {
211                 nbattempts--;
212                 if (*(startlimit + longest) == *(ref - delta + longest)) {
213                         if (A32(ref) == A32(ip)) {
214                                 const u8 *reft = ref + MINMATCH;
215                                 const u8 *ipt = ip + MINMATCH;
216                                 const u8 *startt = ip;
217
218                                 while (ipt < matchlimit-(STEPSIZE - 1)) {
219                                         #if LZ4_ARCH64
220                                         u64 diff = A64(reft) ^ A64(ipt);
221                                         #else
222                                         u32 diff = A32(reft) ^ A32(ipt);
223                                         #endif
224
225                                         if (!diff) {
226                                                 ipt += STEPSIZE;
227                                                 reft += STEPSIZE;
228                                                 continue;
229                                         }
230                                         ipt += LZ4_NBCOMMONBYTES(diff);
231                                         goto _endcount;
232                                 }
233                                 #if LZ4_ARCH64
234                                 if ((ipt < (matchlimit - 3))
235                                         && (A32(reft) == A32(ipt))) {
236                                         ipt += 4;
237                                         reft += 4;
238                                 }
239                                 ipt += 2;
240                                 #endif
241                                 if ((ipt < (matchlimit - 1))
242                                         && (A16(reft) == A16(ipt))) {
243                                         reft += 2;
244                                 }
245                                 if ((ipt < matchlimit) && (*reft == *ipt))
246                                         ipt++;
247 _endcount:
248                                 reft = ref;
249
250                                 while ((startt > startlimit)
251                                         && (reft > hc4->base)
252                                         && (startt[-1] == reft[-1])) {
253                                         startt--;
254                                         reft--;
255                                 }
256
257                                 if ((ipt - startt) > longest) {
258                                         longest = (int)(ipt - startt);
259                                         *matchpos = reft;
260                                         *startpos = startt;
261                                 }
262                         }
263                 }
264                 ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
265         }
266         return longest;
267 }
268
269 static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
270                 int ml, const u8 *ref)
271 {
272         int length, len;
273         u8 *token;
274
275         /* Encode Literal length */
276         length = (int)(*ip - *anchor);
277         token = (*op)++;
278         if (length >= (int)RUN_MASK) {
279                 *token = (RUN_MASK << ML_BITS);
280                 len = length - RUN_MASK;
281                 for (; len > 254 ; len -= 255)
282                         *(*op)++ = 255;
283                 *(*op)++ = (u8)len;
284         } else
285                 *token = (length << ML_BITS);
286
287         /* Copy Literals */
288         LZ4_BLINDCOPY(*anchor, *op, length);
289
290         /* Encode Offset */
291         LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
292
293         /* Encode MatchLength */
294         len = (int)(ml - MINMATCH);
295         if (len >= (int)ML_MASK) {
296                 *token += ML_MASK;
297                 len -= ML_MASK;
298                 for (; len > 509 ; len -= 510) {
299                         *(*op)++ = 255;
300                         *(*op)++ = 255;
301                 }
302                 if (len > 254) {
303                         len -= 255;
304                         *(*op)++ = 255;
305                 }
306                 *(*op)++ = (u8)len;
307         } else
308                 *token += len;
309
310         /* Prepare next loop */
311         *ip += ml;
312         *anchor = *ip;
313
314         return 0;
315 }
316
317 static int lz4_compresshcctx(struct lz4hc_data *ctx,
318                 const char *source,
319                 char *dest,
320                 int isize)
321 {
322         const u8 *ip = (const u8 *)source;
323         const u8 *anchor = ip;
324         const u8 *const iend = ip + isize;
325         const u8 *const mflimit = iend - MFLIMIT;
326         const u8 *const matchlimit = (iend - LASTLITERALS);
327
328         u8 *op = (u8 *)dest;
329
330         int ml, ml2, ml3, ml0;
331         const u8 *ref = NULL;
332         const u8 *start2 = NULL;
333         const u8 *ref2 = NULL;
334         const u8 *start3 = NULL;
335         const u8 *ref3 = NULL;
336         const u8 *start0;
337         const u8 *ref0;
338         int lastrun;
339
340         ip++;
341
342         /* Main Loop */
343         while (ip < mflimit) {
344                 ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
345                 if (!ml) {
346                         ip++;
347                         continue;
348                 }
349
350                 /* saved, in case we would skip too much */
351                 start0 = ip;
352                 ref0 = ref;
353                 ml0 = ml;
354 _search2:
355                 if (ip+ml < mflimit)
356                         ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
357                                 ip + 1, matchlimit, ml, &ref2, &start2);
358                 else
359                         ml2 = ml;
360                 /* No better match */
361                 if (ml2 == ml) {
362                         lz4_encodesequence(&ip, &op, &anchor, ml, ref);
363                         continue;
364                 }
365
366                 if (start0 < ip) {
367                         /* empirical */
368                         if (start2 < ip + ml0) {
369                                 ip = start0;
370                                 ref = ref0;
371                                 ml = ml0;
372                         }
373                 }
374                 /*
375                  * Here, start0==ip
376                  * First Match too small : removed
377                  */
378                 if ((start2 - ip) < 3) {
379                         ml = ml2;
380                         ip = start2;
381                         ref = ref2;
382                         goto _search2;
383                 }
384
385 _search3:
386                 /*
387                  * Currently we have :
388                  * ml2 > ml1, and
389                  * ip1+3 <= ip2 (usually < ip1+ml1)
390                  */
391                 if ((start2 - ip) < OPTIMAL_ML) {
392                         int correction;
393                         int new_ml = ml;
394                         if (new_ml > OPTIMAL_ML)
395                                 new_ml = OPTIMAL_ML;
396                         if (ip + new_ml > start2 + ml2 - MINMATCH)
397                                 new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
398                         correction = new_ml - (int)(start2 - ip);
399                         if (correction > 0) {
400                                 start2 += correction;
401                                 ref2 += correction;
402                                 ml2 -= correction;
403                         }
404                 }
405                 /*
406                  * Now, we have start2 = ip+new_ml,
407                  * with new_ml=min(ml, OPTIMAL_ML=18)
408                  */
409                 if (start2 + ml2 < mflimit)
410                         ml3 = lz4hc_insertandgetwidermatch(ctx,
411                                 start2 + ml2 - 3, start2, matchlimit,
412                                 ml2, &ref3, &start3);
413                 else
414                         ml3 = ml2;
415
416                 /* No better match : 2 sequences to encode */
417                 if (ml3 == ml2) {
418                         /* ip & ref are known; Now for ml */
419                         if (start2 < ip+ml)
420                                 ml = (int)(start2 - ip);
421
422                         /* Now, encode 2 sequences */
423                         lz4_encodesequence(&ip, &op, &anchor, ml, ref);
424                         ip = start2;
425                         lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
426                         continue;
427                 }
428
429                 /* Not enough space for match 2 : remove it */
430                 if (start3 < ip + ml + 3) {
431                         /*
432                          * can write Seq1 immediately ==> Seq2 is removed,
433                          * so Seq3 becomes Seq1
434                          */
435                         if (start3 >= (ip + ml)) {
436                                 if (start2 < ip + ml) {
437                                         int correction =
438                                                 (int)(ip + ml - start2);
439                                         start2 += correction;
440                                         ref2 += correction;
441                                         ml2 -= correction;
442                                         if (ml2 < MINMATCH) {
443                                                 start2 = start3;
444                                                 ref2 = ref3;
445                                                 ml2 = ml3;
446                                         }
447                                 }
448
449                                 lz4_encodesequence(&ip, &op, &anchor, ml, ref);
450                                 ip  = start3;
451                                 ref = ref3;
452                                 ml  = ml3;
453
454                                 start0 = start2;
455                                 ref0 = ref2;
456                                 ml0 = ml2;
457                                 goto _search2;
458                         }
459
460                         start2 = start3;
461                         ref2 = ref3;
462                         ml2 = ml3;
463                         goto _search3;
464                 }
465
466                 /*
467                  * OK, now we have 3 ascending matches; let's write at least
468                  * the first one ip & ref are known; Now for ml
469                  */
470                 if (start2 < ip + ml) {
471                         if ((start2 - ip) < (int)ML_MASK) {
472                                 int correction;
473                                 if (ml > OPTIMAL_ML)
474                                         ml = OPTIMAL_ML;
475                                 if (ip + ml > start2 + ml2 - MINMATCH)
476                                         ml = (int)(start2 - ip) + ml2
477                                                 - MINMATCH;
478                                 correction = ml - (int)(start2 - ip);
479                                 if (correction > 0) {
480                                         start2 += correction;
481                                         ref2 += correction;
482                                         ml2 -= correction;
483                                 }
484                         } else
485                                 ml = (int)(start2 - ip);
486                 }
487                 lz4_encodesequence(&ip, &op, &anchor, ml, ref);
488
489                 ip = start2;
490                 ref = ref2;
491                 ml = ml2;
492
493                 start2 = start3;
494                 ref2 = ref3;
495                 ml2 = ml3;
496
497                 goto _search3;
498         }
499
500         /* Encode Last Literals */
501         lastrun = (int)(iend - anchor);
502         if (lastrun >= (int)RUN_MASK) {
503                 *op++ = (RUN_MASK << ML_BITS);
504                 lastrun -= RUN_MASK;
505                 for (; lastrun > 254 ; lastrun -= 255)
506                         *op++ = 255;
507                 *op++ = (u8) lastrun;
508         } else
509                 *op++ = (lastrun << ML_BITS);
510         memcpy(op, anchor, iend - anchor);
511         op += iend - anchor;
512         /* End */
513         return (int) (((char *)op) - dest);
514 }
515
516 int lz4hc_compress(const unsigned char *src, size_t src_len,
517                         unsigned char *dst, size_t *dst_len, void *wrkmem)
518 {
519         int ret = -1;
520         int out_len = 0;
521
522         struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
523         lz4hc_init(hc4, (const u8 *)src);
524         out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
525                 (char *)dst, (int)src_len);
526
527         if (out_len < 0)
528                 goto exit;
529
530         *dst_len = out_len;
531         return 0;
532
533 exit:
534         return ret;
535 }
536 EXPORT_SYMBOL(lz4hc_compress);
537
538 MODULE_LICENSE("Dual BSD/GPL");
539 MODULE_DESCRIPTION("LZ4HC compressor");