ゴミ箱
inflate_stream.hpp
Go to the documentation of this file.
1 //
2 // Copyright (c) 2016-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/boostorg/beast
8 //
9 // This is a derivative work based on Zlib, copyright below:
10 /*
11  Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
12 
13  This software is provided 'as-is', without any express or implied
14  warranty. In no event will the authors be held liable for any damages
15  arising from the use of this software.
16 
17  Permission is granted to anyone to use this software for any purpose,
18  including commercial applications, and to alter it and redistribute it
19  freely, subject to the following restrictions:
20 
21  1. The origin of this software must not be misrepresented; you must not
22  claim that you wrote the original software. If you use this software
23  in a product, an acknowledgment in the product documentation would be
24  appreciated but is not required.
25  2. Altered source versions must be plainly marked as such, and must not be
26  misrepresented as being the original software.
27  3. This notice may not be removed or altered from any source distribution.
28 
29  Jean-loup Gailly Mark Adler
30  jloup@gzip.org madler@alumni.caltech.edu
31 
32  The data format used by the zlib library is described by RFCs (Request for
33  Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950
34  (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format).
35 */
36 
37 #ifndef BOOST_BEAST_ZLIB_DETAIL_INFLATE_STREAM_HPP
38 #define BOOST_BEAST_ZLIB_DETAIL_INFLATE_STREAM_HPP
39 
46 #include <boost/throw_exception.hpp>
47 #include <algorithm>
48 #include <array>
49 #include <cstdint>
50 #include <cstring>
51 #include <stdexcept>
52 
53 namespace boost {
54 namespace beast {
55 namespace zlib {
56 namespace detail {
57 
59 {
60 protected:
62  {
63  w_.reset(15);
64  }
65 
66  template<class = void> void doClear();
67  template<class = void> void doReset(int windowBits);
68  template<class = void> void doWrite(z_params& zs, Flush flush, error_code& ec);
69 
70  void
72  {
73  doReset(w_.bits());
74  }
75 
76 private:
77  enum Mode
78  {
79  HEAD, // i: waiting for magic header
80  FLAGS, // i: waiting for method and flags (gzip)
81  TIME, // i: waiting for modification time (gzip)
82  OS, // i: waiting for extra flags and operating system (gzip)
83  EXLEN, // i: waiting for extra length (gzip)
84  EXTRA, // i: waiting for extra bytes (gzip)
85  NAME, // i: waiting for end of file name (gzip)
86  COMMENT, // i: waiting for end of comment (gzip)
87  HCRC, // i: waiting for header crc (gzip)
88  TYPE, // i: waiting for type bits, including last-flag bit
89  TYPEDO, // i: same, but skip check to exit inflate on new block
90  STORED, // i: waiting for stored size (length and complement)
91  COPY_, // i/o: same as COPY below, but only first time in
92  COPY, // i/o: waiting for input or output to copy stored block
93  TABLE, // i: waiting for dynamic block table lengths
94  LENLENS, // i: waiting for code length code lengths
95  CODELENS, // i: waiting for length/lit and distance code lengths
96  LEN_, // i: same as LEN below, but only first time in
97  LEN, // i: waiting for length/lit/eob code
98  LENEXT, // i: waiting for length extra bits
99  DIST, // i: waiting for distance code
100  DISTEXT,// i: waiting for distance extra bits
101  MATCH, // o: waiting for output space to copy string
102  LIT, // o: waiting for output space to write literal
103  CHECK, // i: waiting for 32-bit check value
104  LENGTH, // i: waiting for 32-bit length (gzip)
105  DONE, // finished check, done -- remain here until reset
106  BAD, // got a data error -- remain here until reset
107  SYNC // looking for synchronization bytes to restart inflate()
108  };
109 
110  /* Structure for decoding tables. Each entry provides either the
111  information needed to do the operation requested by the code that
112  indexed that table entry, or it provides a pointer to another
113  table that indexes more bits of the code. op indicates whether
114  the entry is a pointer to another table, a literal, a length or
115  distance, an end-of-block, or an invalid code. For a table
116  pointer, the low four bits of op is the number of index bits of
117  that table. For a length or distance, the low four bits of op
118  is the number of extra bits to get after the code. bits is
119  the number of bits in this code or part of the code to drop off
120  of the bit buffer. val is the actual byte to output in the case
121  of a literal, the base length or distance, or the offset from
122  the current table to the next table. Each entry is four bytes.
123 
124  op values as set by inflate_table():
125 
126  00000000 - literal
127  0000tttt - table link, tttt != 0 is the number of table index bits
128  0001eeee - length or distance, eeee is the number of extra bits
129  01100000 - end of block
130  01000000 - invalid code
131  */
132  struct code
133  {
134  std::uint8_t op; // operation, extra bits, table bits
135  std::uint8_t bits; // bits in this part of the code
136  std::uint16_t val; // offset in table or code value
137  };
138 
139  /* Maximum size of the dynamic table. The maximum number of code
140  structures is 1444, which is the sum of 852 for literal/length codes
141  and 592 for distance codes. These values were found by exhaustive
142  searches using the program examples/enough.c found in the zlib
143  distribtution. The arguments to that program are the number of
144  symbols, the initial root table size, and the maximum bit length
145  of a code. "enough 286 9 15" for literal/length codes returns
146  returns 852, and "enough 30 6 15" for distance codes returns 592.
147  The initial root table size (9 or 6) is found in the fifth argument
148  of the inflate_table() calls in inflate.c and infback.c. If the
149  root table size is changed, then these maximum sizes would be need
150  to be recalculated and updated.
151  */
152  static std::uint16_t constexpr kEnoughLens = 852;
153  static std::uint16_t constexpr kEnoughDists = 592;
154  static std::uint16_t constexpr kEnough = kEnoughLens + kEnoughDists;
155 
156  struct codes
157  {
158  code const* lencode;
159  code const* distcode;
160  unsigned lenbits; // VFALCO use std::uint8_t
161  unsigned distbits;
162  };
163 
164  // Type of code to build for inflate_table()
165  enum class build
166  {
167  codes,
168  lens,
169  dists
170  };
171 
172  template<class = void>
173  static
174  void
175  inflate_table(
176  build type,
177  std::uint16_t* lens,
178  std::size_t codes,
179  code** table,
180  unsigned *bits,
181  std::uint16_t* work,
182  error_code& ec);
183 
184  template<class = void>
185  static
186  codes const&
187  get_fixed_tables();
188 
189  template<class = void>
190  void
191  fixedTables();
192 
193  template<class = void>
194  void
195  inflate_fast(ranges& r, error_code& ec);
196 
197  bitstream bi_;
198 
199  Mode mode_ = HEAD; // current inflate mode
200  int last_ = 0; // true if processing last block
201  unsigned dmax_ = 32768U; // zlib header max distance (INFLATE_STRICT)
202 
203  // sliding window
204  window w_;
205 
206  // for string and stored block copying
207  unsigned length_; // literal or length of data to copy
208  unsigned offset_; // distance back to copy string from
209 
210  // for table and code decoding
211  unsigned extra_; // extra bits needed
212 
213  // dynamic table building
214  unsigned ncode_; // number of code length code lengths
215  unsigned nlen_; // number of length code lengths
216  unsigned ndist_; // number of distance code lengths
217  unsigned have_; // number of code lengths in lens[]
218  unsigned short lens_[320]; // temporary storage for code lengths
219  unsigned short work_[288]; // work area for code table building
220  code codes_[kEnough]; // space for code tables
221  code *next_ = codes_; // next available space in codes[]
222  int back_ = -1; // bits back of last unprocessed length/lit
223  unsigned was_; // initial length of match
224 
225  // fixed and dynamic code tables
226  code const* lencode_ = codes_ ; // starting table for length/literal codes
227  code const* distcode_ = codes_; // starting table for distance codes
228  unsigned lenbits_; // index bits for lencode
229  unsigned distbits_; // index bits for distcode
230 };
231 
232 //------------------------------------------------------------------------------
233 
234 template<class>
235 void
237 doReset(int windowBits)
238 {
239  if(windowBits < 8 || windowBits > 15)
240  BOOST_THROW_EXCEPTION(std::domain_error{
241  "windowBits out of range"});
242  w_.reset(windowBits);
243 
244  bi_.flush();
245  mode_ = HEAD;
246  last_ = 0;
247  dmax_ = 32768U;
248  lencode_ = codes_;
249  distcode_ = codes_;
250  next_ = codes_;
251  back_ = -1;
252 }
253 
254 template<class>
255 void
258 {
259 }
260 
261 template<class>
262 void
265 {
266  ranges r;
267  r.in.first = reinterpret_cast<
268  std::uint8_t const*>(zs.next_in);
269  r.in.last = r.in.first + zs.avail_in;
270  r.in.next = r.in.first;
271  r.out.first = reinterpret_cast<
272  std::uint8_t*>(zs.next_out);
273  r.out.last = r.out.first + zs.avail_out;
274  r.out.next = r.out.first;
275 
276  auto const done =
277  [&]
278  {
279  /*
280  Return from inflate(), updating the total counts and the check value.
281  If there was no progress during the inflate() call, return a buffer
282  error. Call updatewindow() to create and/or update the window state.
283  Note: a memory error from inflate() is non-recoverable.
284  */
285 
286 
287  // VFALCO TODO Don't allocate update the window unless necessary
288  if(/*wsize_ ||*/ (r.out.used() && mode_ < BAD &&
289  (mode_ < CHECK || flush != Flush::finish)))
290  w_.write(r.out.first, r.out.used());
291 
292  zs.next_in = r.in.next;
293  zs.avail_in = r.in.avail();
294  zs.next_out = r.out.next;
295  zs.avail_out = r.out.avail();
296  zs.total_in += r.in.used();
297  zs.total_out += r.out.used();
298  zs.data_type = bi_.size() + (last_ ? 64 : 0) +
299  (mode_ == TYPE ? 128 : 0) +
300  (mode_ == LEN_ || mode_ == COPY_ ? 256 : 0);
301 
302  if(((! r.in.used() && ! r.out.used()) ||
303  flush == Flush::finish) && ! ec)
304  ec = error::need_buffers;
305  };
306  auto const err =
307  [&](error e)
308  {
309  ec = e;
310  mode_ = BAD;
311  };
312 
313  if(mode_ == TYPE)
314  mode_ = TYPEDO;
315 
316  for(;;)
317  {
318  switch(mode_)
319  {
320  case HEAD:
321  mode_ = TYPEDO;
322  break;
323 
324  case TYPE:
325  if(flush == Flush::block || flush == Flush::trees)
326  return done();
327  // fall through
328 
329  case TYPEDO:
330  {
331  if(last_)
332  {
333  bi_.flush_byte();
334  mode_ = CHECK;
335  break;
336  }
337  if(! bi_.fill(3, r.in.next, r.in.last))
338  return done();
339  std::uint8_t v;
340  bi_.read(v, 1);
341  last_ = v != 0;
342  bi_.read(v, 2);
343  switch(v)
344  {
345  case 0:
346  // uncompressed block
347  mode_ = STORED;
348  break;
349  case 1:
350  // fixed Huffman table
351  fixedTables();
352  mode_ = LEN_; /* decode codes */
353  if(flush == Flush::trees)
354  return done();
355  break;
356  case 2:
357  // dynamic Huffman table
358  mode_ = TABLE;
359  break;
360 
361  default:
362  return err(error::invalid_block_type);
363  }
364  break;
365  }
366 
367  case STORED:
368  {
369  bi_.flush_byte();
370  std::uint32_t v;
371  if(! bi_.fill(32, r.in.next, r.in.last))
372  return done();
373  bi_.peek(v, 32);
374  length_ = v & 0xffff;
375  if(length_ != ((v >> 16) ^ 0xffff))
376  return err(error::invalid_stored_length);
377  // flush instead of read, otherwise
378  // undefined right shift behavior.
379  bi_.flush();
380  mode_ = COPY_;
381  if(flush == Flush::trees)
382  return done();
383  // fall through
384  }
385 
386  case COPY_:
387  mode_ = COPY;
388  // fall through
389 
390  case COPY:
391  {
392  auto copy = length_;
393  if(copy == 0)
394  {
395  mode_ = TYPE;
396  break;
397  }
398  copy = clamp(copy, r.in.avail());
399  copy = clamp(copy, r.out.avail());
400  if(copy == 0)
401  return done();
402  std::memcpy(r.out.next, r.in.next, copy);
403  r.in.next += copy;
404  r.out.next += copy;
405  length_ -= copy;
406  break;
407  }
408 
409  case TABLE:
410  if(! bi_.fill(5 + 5 + 4, r.in.next, r.in.last))
411  return done();
412  bi_.read(nlen_, 5);
413  nlen_ += 257;
414  bi_.read(ndist_, 5);
415  ndist_ += 1;
416  bi_.read(ncode_, 4);
417  ncode_ += 4;
418  if(nlen_ > 286 || ndist_ > 30)
419  return err(error::too_many_symbols);
420  have_ = 0;
421  mode_ = LENLENS;
422  // fall through
423 
424  case LENLENS:
425  {
426  static std::array<std::uint8_t, 19> constexpr order = {{
427  16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}};
428  while(have_ < ncode_)
429  {
430  if(! bi_.fill(3, r.in.next, r.in.last))
431  return done();
432  bi_.read(lens_[order[have_]], 3);
433  ++have_;
434  }
435  while(have_ < order.size())
436  lens_[order[have_++]] = 0;
437 
438  next_ = &codes_[0];
439  lencode_ = next_;
440  lenbits_ = 7;
441  inflate_table(build::codes, &lens_[0],
442  order.size(), &next_, &lenbits_, work_, ec);
443  if(ec)
444  {
445  mode_ = BAD;
446  break;
447  }
448  have_ = 0;
449  mode_ = CODELENS;
450  // fall through
451  }
452 
453  case CODELENS:
454  {
455  while(have_ < nlen_ + ndist_)
456  {
457  std::uint16_t v;
458  if(! bi_.fill(lenbits_, r.in.next, r.in.last))
459  return done();
460  bi_.peek(v, lenbits_);
461  auto cp = &lencode_[v];
462  if(cp->val < 16)
463  {
464  bi_.drop(cp->bits);
465  lens_[have_++] = cp->val;
466  }
467  else
468  {
469  std::uint16_t len;
470  std::uint16_t copy;
471  if(cp->val == 16)
472  {
473  if(! bi_.fill(cp->bits + 2, r.in.next, r.in.last))
474  return done();
475  bi_.drop(cp->bits);
476  if(have_ == 0)
478  bi_.read(copy, 2);
479  len = lens_[have_ - 1];
480  copy += 3;
481 
482  }
483  else if(cp->val == 17)
484  {
485  if(! bi_.fill(cp->bits + 3, r.in.next, r.in.last))
486  return done();
487  bi_.drop(cp->bits);
488  bi_.read(copy, 3);
489  len = 0;
490  copy += 3;
491  }
492  else
493  {
494  if(! bi_.fill(cp->bits + 7, r.in.next, r.in.last))
495  return done();
496  bi_.drop(cp->bits);
497  bi_.read(copy, 7);
498  len = 0;
499  copy += 11;
500  }
501  if(have_ + copy > nlen_ + ndist_)
503  std::fill(&lens_[have_], &lens_[have_ + copy], len);
504  have_ += copy;
505  copy = 0;
506  }
507  }
508  // handle error breaks in while
509  if(mode_ == BAD)
510  break;
511  // check for end-of-block code (better have one)
512  if(lens_[256] == 0)
513  return err(error::missing_eob);
514  /* build code tables -- note: do not change the lenbits or distbits
515  values here (9 and 6) without reading the comments in inftrees.hpp
516  concerning the kEnough constants, which depend on those values */
517  next_ = &codes_[0];
518  lencode_ = next_;
519  lenbits_ = 9;
520  inflate_table(build::lens, &lens_[0],
521  nlen_, &next_, &lenbits_, work_, ec);
522  if(ec)
523  {
524  mode_ = BAD;
525  return;
526  }
527  distcode_ = next_;
528  distbits_ = 6;
529  inflate_table(build::dists, lens_ + nlen_,
530  ndist_, &next_, &distbits_, work_, ec);
531  if(ec)
532  {
533  mode_ = BAD;
534  return;
535  }
536  mode_ = LEN_;
537  if(flush == Flush::trees)
538  return done();
539  // fall through
540  }
541 
542  case LEN_:
543  mode_ = LEN;
544  // fall through
545 
546  case LEN:
547  {
548  if(r.in.avail() >= 6 && r.out.avail() >= 258)
549  {
550  inflate_fast(r, ec);
551  if(ec)
552  {
553  mode_ = BAD;
554  return;
555  }
556  if(mode_ == TYPE)
557  back_ = -1;
558  break;
559  }
560  if(! bi_.fill(lenbits_, r.in.next, r.in.last))
561  return done();
562  std::uint16_t v;
563  back_ = 0;
564  bi_.peek(v, lenbits_);
565  auto cp = &lencode_[v];
566  if(cp->op && (cp->op & 0xf0) == 0)
567  {
568  auto prev = cp;
569  if(! bi_.fill(prev->bits + prev->op, r.in.next, r.in.last))
570  return done();
571  bi_.peek(v, prev->bits + prev->op);
572  cp = &lencode_[prev->val + (v >> prev->bits)];
573  bi_.drop(prev->bits + cp->bits);
574  back_ += prev->bits + cp->bits;
575  }
576  else
577  {
578  bi_.drop(cp->bits);
579  back_ += cp->bits;
580  }
581  length_ = cp->val;
582  if(cp->op == 0)
583  {
584  mode_ = LIT;
585  break;
586  }
587  if(cp->op & 32)
588  {
589  back_ = -1;
590  mode_ = TYPE;
591  break;
592  }
593  if(cp->op & 64)
594  return err(error::invalid_literal_length);
595  extra_ = cp->op & 15;
596  mode_ = LENEXT;
597  // fall through
598  }
599 
600  case LENEXT:
601  if(extra_)
602  {
603  if(! bi_.fill(extra_, r.in.next, r.in.last))
604  return done();
605  std::uint16_t v;
606  bi_.read(v, extra_);
607  length_ += v;
608  back_ += extra_;
609  }
610  was_ = length_;
611  mode_ = DIST;
612  // fall through
613 
614  case DIST:
615  {
616  if(! bi_.fill(distbits_, r.in.next, r.in.last))
617  return done();
618  std::uint16_t v;
619  bi_.peek(v, distbits_);
620  auto cp = &distcode_[v];
621  if((cp->op & 0xf0) == 0)
622  {
623  auto prev = cp;
624  if(! bi_.fill(prev->bits + prev->op, r.in.next, r.in.last))
625  return done();
626  bi_.peek(v, prev->bits + prev->op);
627  cp = &distcode_[prev->val + (v >> prev->bits)];
628  bi_.drop(prev->bits + cp->bits);
629  back_ += prev->bits + cp->bits;
630  }
631  else
632  {
633  bi_.drop(cp->bits);
634  back_ += cp->bits;
635  }
636  if(cp->op & 64)
637  return err(error::invalid_distance_code);
638  offset_ = cp->val;
639  extra_ = cp->op & 15;
640  mode_ = DISTEXT;
641  // fall through
642  }
643 
644  case DISTEXT:
645  if(extra_)
646  {
647  std::uint16_t v;
648  if(! bi_.fill(extra_, r.in.next, r.in.last))
649  return done();
650  bi_.read(v, extra_);
651  offset_ += v;
652  back_ += extra_;
653  }
654 #ifdef INFLATE_STRICT
655  if(offset_ > dmax_)
656  return err(error::invalid_distance);
657 #endif
658  mode_ = MATCH;
659  // fall through
660 
661  case MATCH:
662  {
663  if(! r.out.avail())
664  return done();
665  if(offset_ > r.out.used())
666  {
667  // copy from window
668  auto offset = static_cast<std::uint16_t>(
669  offset_ - r.out.used());
670  if(offset > w_.size())
671  return err(error::invalid_distance);
672  auto const n = clamp(clamp(
673  length_, offset), r.out.avail());
674  w_.read(r.out.next, offset, n);
675  r.out.next += n;
676  length_ -= n;
677  }
678  else
679  {
680  // copy from output
681  auto in = r.out.next - offset_;
682  auto n = clamp(length_, r.out.avail());
683  length_ -= n;
684  while(n--)
685  *r.out.next++ = *in++;
686  }
687  if(length_ == 0)
688  mode_ = LEN;
689  break;
690  }
691 
692  case LIT:
693  {
694  if(! r.out.avail())
695  return done();
696  auto const v = static_cast<std::uint8_t>(length_);
697  *r.out.next++ = v;
698  mode_ = LEN;
699  break;
700  }
701 
702  case CHECK:
703  mode_ = DONE;
704  // fall through
705 
706  case DONE:
708  return done();
709 
710  case BAD:
711  return done();
712 
713  case SYNC:
714  default:
715  BOOST_THROW_EXCEPTION(std::logic_error{
716  "stream error"});
717  }
718  }
719 }
720 
721 //------------------------------------------------------------------------------
722 
723 /* Build a set of tables to decode the provided canonical Huffman code.
724  The code lengths are lens[0..codes-1]. The result starts at *table,
725  whose indices are 0..2^bits-1. work is a writable array of at least
726  lens shorts, which is used as a work area. type is the type of code
727  to be generated, build::codes, build::lens, or build::dists. On
728  return, zero is success, -1 is an invalid code, and +1 means that
729  kEnough isn't enough. table on return points to the next available
730  entry's address. bits is the requested root table index bits, and
731  on return it is the actual root table index bits. It will differ if
732  the request is greater than the longest code or if it is less than
733  the shortest code.
734 */
735 template<class>
736 void
737 inflate_stream::
738 inflate_table(
739  build type,
740  std::uint16_t* lens,
741  std::size_t codes,
742  code** table,
743  unsigned *bits,
744  std::uint16_t* work,
745  error_code& ec)
746 {
747  unsigned len; // a code's length in bits
748  unsigned sym; // index of code symbols
749  unsigned min, max; // minimum and maximum code lengths
750  unsigned root; // number of index bits for root table
751  unsigned curr; // number of index bits for current table
752  unsigned drop; // code bits to drop for sub-table
753  int left; // number of prefix codes available
754  unsigned used; // code entries in table used
755  unsigned huff; // Huffman code
756  unsigned incr; // for incrementing code, index
757  unsigned fill; // index for replicating entries
758  unsigned low; // low bits for current root entry
759  unsigned mask; // mask for low root bits
760  code here; // table entry for duplication
761  code *next; // next available space in table
762  std::uint16_t const* base; // base value table to use
763  std::uint16_t const* extra; // extra bits table to use
764  int end; // use base and extra for symbol > end
765  std::uint16_t count[15+1]; // number of codes of each length
766  std::uint16_t offs[15+1]; // offsets in table for each length
767 
768  // Length codes 257..285 base
769  static std::uint16_t constexpr lbase[31] = {
770  3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
771  35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
772 
773  // Length codes 257..285 extra
774  static std::uint16_t constexpr lext[31] = {
775  16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
776  19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 72, 78};
777 
778  // Distance codes 0..29 base
779  static std::uint16_t constexpr dbase[32] = {
780  1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
781  257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
782  8193, 12289, 16385, 24577, 0, 0};
783 
784  // Distance codes 0..29 extra
785  static std::uint16_t constexpr dext[32] = {
786  16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
787  23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
788  28, 28, 29, 29, 64, 64};
789 
790  /*
791  Process a set of code lengths to create a canonical Huffman code. The
792  code lengths are lens[0..codes-1]. Each length corresponds to the
793  symbols 0..codes-1. The Huffman code is generated by first sorting the
794  symbols by length from short to long, and retaining the symbol order
795  for codes with equal lengths. Then the code starts with all zero bits
796  for the first code of the shortest length, and the codes are integer
797  increments for the same length, and zeros are appended as the length
798  increases. For the deflate format, these bits are stored backwards
799  from their more natural integer increment ordering, and so when the
800  decoding tables are built in the large loop below, the integer codes
801  are incremented backwards.
802 
803  This routine assumes, but does not check, that all of the entries in
804  lens[] are in the range 0..15. The caller must assure this.
805  1..15 is interpreted as that code length. zero means that that
806  symbol does not occur in this code.
807 
808  The codes are sorted by computing a count of codes for each length,
809  creating from that a table of starting indices for each length in the
810  sorted table, and then entering the symbols in order in the sorted
811  table. The sorted table is work[], with that space being provided by
812  the caller.
813 
814  The length counts are used for other purposes as well, i.e. finding
815  the minimum and maximum length codes, determining if there are any
816  codes at all, checking for a valid set of lengths, and looking ahead
817  at length counts to determine sub-table sizes when building the
818  decoding tables.
819  */
820 
821  /* accumulate lengths for codes (assumes lens[] all in 0..15) */
822  for (len = 0; len <= 15; len++)
823  count[len] = 0;
824  for (sym = 0; sym < codes; sym++)
825  count[lens[sym]]++;
826 
827  /* bound code lengths, force root to be within code lengths */
828  root = *bits;
829  for (max = 15; max >= 1; max--)
830  if (count[max] != 0)
831  break;
832  if (root > max)
833  root = max;
834  if (max == 0)
835  { /* no symbols to code at all */
836  here.op = (std::uint8_t)64; /* invalid code marker */
837  here.bits = (std::uint8_t)1;
838  here.val = (std::uint16_t)0;
839  *(*table)++ = here; /* make a table to force an error */
840  *(*table)++ = here;
841  *bits = 1;
842  return; /* no symbols, but wait for decoding to report error */
843  }
844  for (min = 1; min < max; min++)
845  if (count[min] != 0)
846  break;
847  if (root < min)
848  root = min;
849 
850  /* check for an over-subscribed or incomplete set of lengths */
851  left = 1;
852  for (len = 1; len <= 15; len++)
853  {
854  left <<= 1;
855  left -= count[len];
856  if (left < 0)
857  {
859  return;
860  }
861  }
862  if (left > 0 && (type == build::codes || max != 1))
863  {
865  return;
866  }
867 
868  /* generate offsets into symbol table for each length for sorting */
869  offs[1] = 0;
870  for (len = 1; len < 15; len++)
871  offs[len + 1] = offs[len] + count[len];
872 
873  /* sort symbols by length, by symbol order within each length */
874  for (sym = 0; sym < codes; sym++)
875  if (lens[sym] != 0)
876  work[offs[lens[sym]]++] = (std::uint16_t)sym;
877 
878  /*
879  Create and fill in decoding tables. In this loop, the table being
880  filled is at next and has curr index bits. The code being used is huff
881  with length len. That code is converted to an index by dropping drop
882  bits off of the bottom. For codes where len is less than drop + curr,
883  those top drop + curr - len bits are incremented through all values to
884  fill the table with replicated entries.
885 
886  root is the number of index bits for the root table. When len exceeds
887  root, sub-tables are created pointed to by the root entry with an index
888  of the low root bits of huff. This is saved in low to check for when a
889  new sub-table should be started. drop is zero when the root table is
890  being filled, and drop is root when sub-tables are being filled.
891 
892  When a new sub-table is needed, it is necessary to look ahead in the
893  code lengths to determine what size sub-table is needed. The length
894  counts are used for this, and so count[] is decremented as codes are
895  entered in the tables.
896 
897  used keeps track of how many table entries have been allocated from the
898  provided *table space. It is checked for build::lens and DIST tables against
899  the constants kEnoughLens and kEnoughDists to guard against changes in
900  the initial root table size constants. See the comments in inftrees.hpp
901  for more information.
902 
903  sym increments through all symbols, and the loop terminates when
904  all codes of length max, i.e. all codes, have been processed. This
905  routine permits incomplete codes, so another loop after this one fills
906  in the rest of the decoding tables with invalid code markers.
907  */
908 
909  /* set up for code type */
910  switch (type)
911  {
912  case build::codes:
913  base = extra = work; /* dummy value--not used */
914  end = 19;
915  break;
916  case build::lens:
917  base = lbase;
918  base -= 257;
919  extra = lext;
920  extra -= 257;
921  end = 256;
922  break;
923  default: /* build::dists */
924  base = dbase;
925  extra = dext;
926  end = -1;
927  }
928 
929  /* initialize state for loop */
930  huff = 0; /* starting code */
931  sym = 0; /* starting code symbol */
932  len = min; /* starting code length */
933  next = *table; /* current table to fill in */
934  curr = root; /* current table index bits */
935  drop = 0; /* current bits to drop from code for index */
936  low = (unsigned)(-1); /* trigger new sub-table when len > root */
937  used = 1U << root; /* use root table entries */
938  mask = used - 1; /* mask for comparing low */
939 
940  auto const not_enough = []
941  {
942  BOOST_THROW_EXCEPTION(std::logic_error{
943  "insufficient output size when inflating tables"});
944  };
945 
946  // check available table space
947  if ((type == build::lens && used > kEnoughLens) ||
948  (type == build::dists && used > kEnoughDists))
949  return not_enough();
950 
951  /* process all codes and make table entries */
952  for (;;)
953  {
954  /* create table entry */
955  here.bits = (std::uint8_t)(len - drop);
956  if ((int)(work[sym]) < end)
957  {
958  here.op = (std::uint8_t)0;
959  here.val = work[sym];
960  }
961  else if ((int)(work[sym]) > end)
962  {
963  here.op = (std::uint8_t)(extra[work[sym]]);
964  here.val = base[work[sym]];
965  }
966  else
967  {
968  here.op = (std::uint8_t)(32 + 64); /* end of block */
969  here.val = 0;
970  }
971 
972  /* replicate for those indices with low len bits equal to huff */
973  incr = 1U << (len - drop);
974  fill = 1U << curr;
975  min = fill; /* save offset to next table */
976  do
977  {
978  fill -= incr;
979  next[(huff >> drop) + fill] = here;
980  } while (fill != 0);
981 
982  /* backwards increment the len-bit code huff */
983  incr = 1U << (len - 1);
984  while (huff & incr)
985  incr >>= 1;
986  if (incr != 0)
987  {
988  huff &= incr - 1;
989  huff += incr;
990  }
991  else
992  huff = 0;
993 
994  /* go to next symbol, update count, len */
995  sym++;
996  if (--(count[len]) == 0)
997  {
998  if (len == max) break;
999  len = lens[work[sym]];
1000  }
1001 
1002  /* create new sub-table if needed */
1003  if (len > root && (huff & mask) != low)
1004  {
1005  /* if first time, transition to sub-tables */
1006  if (drop == 0)
1007  drop = root;
1008 
1009  /* increment past last table */
1010  next += min; /* here min is 1 << curr */
1011 
1012  /* determine length of next table */
1013  curr = len - drop;
1014  left = (int)(1 << curr);
1015  while (curr + drop < max)
1016  {
1017  left -= count[curr + drop];
1018  if (left <= 0) break;
1019  curr++;
1020  left <<= 1;
1021  }
1022 
1023  /* check for enough space */
1024  used += 1U << curr;
1025  if ((type == build::lens && used > kEnoughLens) ||
1026  (type == build::dists && used > kEnoughDists))
1027  return not_enough();
1028 
1029  /* point entry in root table to sub-table */
1030  low = huff & mask;
1031  (*table)[low].op = (std::uint8_t)curr;
1032  (*table)[low].bits = (std::uint8_t)root;
1033  (*table)[low].val = (std::uint16_t)(next - *table);
1034  }
1035  }
1036 
1037  /* fill in remaining table entry if code is incomplete (guaranteed to have
1038  at most one remaining entry, since if the code is incomplete, the
1039  maximum code length that was allowed to get this far is one bit) */
1040  if (huff != 0)
1041  {
1042  here.op = 64; // invalid code marker
1043  here.bits = (std::uint8_t)(len - drop);
1044  here.val = 0;
1045  next[huff] = here;
1046  }
1047 
1048  *table += used;
1049  *bits = root;
1050 }
1051 
1052 template<class>
1053 auto
1054 inflate_stream::
1055 get_fixed_tables() ->
1056  codes const&
1057 {
1058  struct fixed_codes : codes
1059  {
1060  code len_[512];
1061  code dist_[32];
1062 
1063  fixed_codes()
1064  {
1065  lencode = len_;
1066  lenbits = 9;
1067  distcode = dist_;
1068  distbits = 5;
1069 
1070  std::uint16_t lens[320];
1071  std::uint16_t work[288];
1072 
1073  std::fill(&lens[ 0], &lens[144], std::uint16_t{8});
1074  std::fill(&lens[144], &lens[256], std::uint16_t{9});
1075  std::fill(&lens[256], &lens[280], std::uint16_t{7});
1076  std::fill(&lens[280], &lens[288], std::uint16_t{8});
1077 
1078  {
1079  error_code ec;
1080  auto next = &len_[0];
1081  inflate_table(build::lens,
1082  lens, 288, &next, &lenbits, work, ec);
1083  if(ec)
1084  BOOST_THROW_EXCEPTION(std::logic_error{ec.message()});
1085  }
1086 
1087  // VFALCO These fixups are from ZLib
1088  len_[ 99].op = 64;
1089  len_[227].op = 64;
1090  len_[355].op = 64;
1091  len_[483].op = 64;
1092 
1093  {
1094  error_code ec;
1095  auto next = &dist_[0];
1096  std::fill(&lens[0], &lens[32], std::uint16_t{5});
1097  inflate_table(build::dists,
1098  lens, 32, &next, &distbits, work, ec);
1099  if(ec)
1100  BOOST_THROW_EXCEPTION(std::logic_error{ec.message()});
1101  }
1102  }
1103  };
1104 
1105  static fixed_codes const fc;
1106  return fc;
1107 }
1108 
1109 template<class>
1110 void
1111 inflate_stream::
1112 fixedTables()
1113 {
1114  auto const fc = get_fixed_tables();
1115  lencode_ = fc.lencode;
1116  lenbits_ = fc.lenbits;
1117  distcode_ = fc.distcode;
1118  distbits_ = fc.distbits;
1119 }
1120 
1121 /*
1122  Decode literal, length, and distance codes and write out the resulting
1123  literal and match bytes until either not enough input or output is
1124  available, an end-of-block is encountered, or a data error is encountered.
1125  When large enough input and output buffers are supplied to inflate(), for
1126  example, a 16K input buffer and a 64K output buffer, more than 95% of the
1127  inflate execution time is spent in this routine.
1128 
1129  Entry assumptions:
1130 
1131  state->mode_ == LEN
1132  zs.avail_in >= 6
1133  zs.avail_out >= 258
1134  start >= zs.avail_out
1135  state->bits_ < 8
1136 
1137  On return, state->mode_ is one of:
1138 
1139  LEN -- ran out of enough output space or enough available input
1140  TYPE -- reached end of block code, inflate() to interpret next block
1141  BAD -- error in block data
1142 
1143  Notes:
1144 
1145  - The maximum input bits used by a length/distance pair is 15 bits for the
1146  length code, 5 bits for the length extra, 15 bits for the distance code,
1147  and 13 bits for the distance extra. This totals 48 bits, or six bytes.
1148  Therefore if zs.avail_in >= 6, then there is enough input to avoid
1149  checking for available input while decoding.
1150 
1151  - The maximum bytes that a single length/distance pair can output is 258
1152  bytes, which is the maximum length that can be coded. inflate_fast()
1153  requires zs.avail_out >= 258 for each loop to avoid checking for
1154  output space.
1155 
1156  inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
1157  - Using bit fields for code structure
1158  - Different op definition to avoid & for extra bits (do & for table bits)
1159  - Three separate decoding do-loops for direct, window, and wnext == 0
1160  - Special case for distance > 1 copies to do overlapped load and store copy
1161  - Explicit branch predictions (based on measured branch probabilities)
1162  - Deferring match copy and interspersed it with decoding subsequent codes
1163  - Swapping literal/length else
1164  - Swapping window/direct else
1165  - Larger unrolled copy loops (three is about right)
1166  - Moving len -= 3 statement into middle of loop
1167  */
1168 template<class>
1169 void
1170 inflate_stream::
1171 inflate_fast(ranges& r, error_code& ec)
1172 {
1173  unsigned char const* last; // have enough input while in < last
1174  unsigned char *end; // while out < end, enough space available
1175  std::size_t op; // code bits, operation, extra bits, or window position, window bytes to copy
1176  unsigned len; // match length, unused bytes
1177  unsigned dist; // match distance
1178  unsigned const lmask =
1179  (1U << lenbits_) - 1; // mask for first level of length codes
1180  unsigned const dmask =
1181  (1U << distbits_) - 1; // mask for first level of distance codes
1182 
1183  last = r.in.next + (r.in.avail() - 5);
1184  end = r.out.next + (r.out.avail() - 257);
1185 
1186  /* decode literals and length/distances until end-of-block or not enough
1187  input data or output space */
1188  do
1189  {
1190  if(bi_.size() < 15)
1191  bi_.fill_16(r.in.next);
1192  auto cp = &lencode_[bi_.peek_fast() & lmask];
1193  dolen:
1194  bi_.drop(cp->bits);
1195  op = (unsigned)(cp->op);
1196  if(op == 0)
1197  {
1198  // literal
1199  *r.out.next++ = (unsigned char)(cp->val);
1200  }
1201  else if(op & 16)
1202  {
1203  // length base
1204  len = (unsigned)(cp->val);
1205  op &= 15; // number of extra bits
1206  if(op)
1207  {
1208  if(bi_.size() < op)
1209  bi_.fill_8(r.in.next);
1210  len += (unsigned)bi_.peek_fast() & ((1U << op) - 1);
1211  bi_.drop(op);
1212  }
1213  if(bi_.size() < 15)
1214  bi_.fill_16(r.in.next);
1215  cp = &distcode_[bi_.peek_fast() & dmask];
1216  dodist:
1217  bi_.drop(cp->bits);
1218  op = (unsigned)(cp->op);
1219  if(op & 16)
1220  {
1221  // distance base
1222  dist = (unsigned)(cp->val);
1223  op &= 15; // number of extra bits
1224  if(bi_.size() < op)
1225  {
1226  bi_.fill_8(r.in.next);
1227  if(bi_.size() < op)
1228  bi_.fill_8(r.in.next);
1229  }
1230  dist += (unsigned)bi_.peek_fast() & ((1U << op) - 1);
1231 #ifdef INFLATE_STRICT
1232  if(dist > dmax_)
1233  {
1235  mode_ = BAD;
1236  break;
1237  }
1238 #endif
1239  bi_.drop(op);
1240 
1241  op = r.out.used();
1242  if(dist > op)
1243  {
1244  // copy from window
1245  op = dist - op; // distance back in window
1246  if(op > w_.size())
1247  {
1249  mode_ = BAD;
1250  break;
1251  }
1252  auto const n = clamp(len, op);
1253  w_.read(r.out.next, op, n);
1254  r.out.next += n;
1255  len -= n;
1256  }
1257  if(len > 0)
1258  {
1259  // copy from output
1260  auto in = r.out.next - dist;
1261  auto n = clamp(len, r.out.avail());
1262  len -= n;
1263  while(n--)
1264  *r.out.next++ = *in++;
1265  }
1266  }
1267  else if((op & 64) == 0)
1268  {
1269  // 2nd level distance code
1270  cp = &distcode_[cp->val + (bi_.peek_fast() & ((1U << op) - 1))];
1271  goto dodist;
1272  }
1273  else
1274  {
1276  mode_ = BAD;
1277  break;
1278  }
1279  }
1280  else if((op & 64) == 0)
1281  {
1282  // 2nd level length code
1283  cp = &lencode_[cp->val + (bi_.peek_fast() & ((1U << op) - 1))];
1284  goto dolen;
1285  }
1286  else if(op & 32)
1287  {
1288  // end-of-block
1289  mode_ = TYPE;
1290  break;
1291  }
1292  else
1293  {
1295  mode_ = BAD;
1296  break;
1297  }
1298  }
1299  while(r.in.next < last && r.out.next < end);
1300 
1301  // return unused bytes (on entry, bits < 8, so in won't go too far back)
1302  bi_.rewind(r.in.next);
1303 }
1304 
1305 } // detail
1306 } // zlib
1307 } // beast
1308 } // boost
1309 
1310 #endif
iter_t first
Definition: ranges.hpp:58
Invalid distance too far back.
iter_t last
Definition: ranges.hpp:59
int data_type
Definition: zlib.hpp:109
std::size_t avail() const
Definition: ranges.hpp:78
void doClear()
Definition: inflate_stream.hpp:257
Definition: async_result.hpp:20
Definition: zlib.hpp:79
std::size_t total_out
Definition: zlib.hpp:107
Too many length or distance symbols.
range< true > in
Definition: ranges.hpp:84
inflate_stream()
Definition: inflate_stream.hpp:61
Flush
Definition: zlib.hpp:114
Definition: bitstream.hpp:49
void doReset()
Definition: inflate_stream.hpp:71
std::size_t total_in
Definition: zlib.hpp:95
boost::system::error_code error_code
The type of error code used by the library.
Definition: error.hpp:21
void const * next_in
Definition: zlib.hpp:85
void * next_out
Definition: zlib.hpp:99
Definition: ranges.hpp:48
void reset(int bits)
Definition: window.hpp:92
int bits() const
Definition: window.hpp:61
error
Definition: error.hpp:50
Definition: inflate_stream.hpp:58
iter_t next
Definition: ranges.hpp:60
std::size_t avail_out
Definition: zlib.hpp:103
void doWrite(z_params &zs, Flush flush, error_code &ec)
Definition: inflate_stream.hpp:264
std::size_t avail_in
Definition: zlib.hpp:91
U clamp(U u, V v)
Definition: ranges.hpp:92
std::size_t used() const
Definition: ranges.hpp:71
range< false > out
Definition: ranges.hpp:85
Definition: window.hpp:51
Missing end of block code.