SDSL  3.0.0
Succinct Data Structure Library
coder_elias_gamma.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
8 #ifndef SDSL_CODER_ELIAS_GAMMA
9 #define SDSL_CODER_ELIAS_GAMMA
10 
11 #include <sdsl/int_vector.hpp>
12 
13 namespace sdsl
14 {
15 
16 namespace coder
17 {
18 
20 template <typename T = void>
22 {
23  public:
24  typedef uint64_t size_type;
25 
26  static struct impl
27  {
29 
33  uint32_t prefixsum[1 << 16];
34 
35  uint16_t prefixsum_8bit[(1 << 8) * 8];
36 
37  impl()
38  {
39  // initialize prefixsum
40  for (uint64_t x = 0; x < (1 << 16); ++x)
41  {
42  const uint64_t * w = &x; // copy of x
43  uint64_t value = 0;
44  uint16_t numbers = 0, offset = 0, offset2 = 0;
45  while ((x >> offset) != 0)
46  {
47  uint64_t len_1 = bits::read_unary_bounded(w, offset);
48  if (len_1 == 0)
49  {
50  offset += 1;
51  value += 1;
52  ++numbers;
53  }
54  else
55  {
56  offset2 = offset + len_1 + 1;
57  if (offset2 + len_1 <= 16)
58  {
59  value += bits::read_int_bounded(w, offset2, len_1) + (1ULL << len_1);
60  offset = offset2 + len_1;
61  ++numbers;
62  }
63  else
64  break;
65  }
66  }
67  uint32_t result = 0; // the highest 8 bit equal the shift/offset, the second highest 8 bit equal the
68  // number of decoded values,
69  // and the last 16 bit equals value of decoded prefix sum
70  result = (offset << 24) | (numbers << 16) | value;
71  if (value > 0) assert(offset > 0 and numbers > 0 and offset <= 16 and numbers <= 16);
72  prefixsum[x] = result;
73  }
74  // initialize prefixsum_8bit
75 
76  for (uint32_t maxi = 1, idx = 0; maxi <= 8; ++maxi)
77  {
78  for (uint64_t x = 0; x < (1 << 8); ++x)
79  {
80  const uint64_t * w = &x; // copy of x
81  uint64_t value = 0;
82  uint32_t numbers = 0, offset = 0, offset2 = 0;
83  while ((x >> offset) != 0 and numbers < maxi)
84  {
85  uint64_t len_1 = bits::read_unary_bounded(w, offset);
86  if (len_1 == 0)
87  {
88  offset += 1;
89  value += 1;
90  ++numbers;
91  }
92  else
93  {
94  offset2 = offset + len_1 + 1;
95  if (offset2 + len_1 <= 8)
96  {
97  value += bits::read_int_bounded(w, offset2, len_1) + (1ULL << len_1);
98  offset = offset2 + len_1;
99  ++numbers;
100  }
101  else
102  break;
103  }
104  }
105  uint16_t result = 0; // the highest 8 bit equal the shift/offset, the second highest 8 bit equal the
106  // number of decoded values,
107  // and the last 16 bit equals value of decoded prefix sum
108  result = (offset << 12) | (numbers << 8) | value;
109  prefixsum_8bit[idx++] = result;
110  }
111  }
112  }
113  } data;
114 
115  static const uint8_t min_codeword_length = 1; // 1 represents 1 and is the code word with minimum length
116  static uint8_t encoding_length(uint64_t);
118  /* \param data Bitstring
119  * \param start_idx Starting index of the decoding.
120  * \param n Number of values to decode from the bitstring.
121  * \param it Iterator to decode the values.
122  */
123  template <bool t_sumup, bool t_inc, class t_iter>
124  static uint64_t decode(const uint64_t * data, const size_type start_idx, size_type n, t_iter it = (t_iter) nullptr);
125 
128 
133  static uint64_t decode_prefix_sum(const uint64_t * d, const size_type start_idx, size_type n);
134  static uint64_t decode_prefix_sum(const uint64_t * d,
135  const size_type start_idx,
136  const size_type end_idx,
137  size_type n);
138 
139  template <class int_vector>
140  static bool encode(const int_vector & v, int_vector & z);
141  template <class int_vector>
142  static bool decode(const int_vector & z, int_vector & v);
143 
145  /* \param x Positive integer to encode.
146  * \param z Raw data of vector to write the encoded form of x.
147  * \param start_idx Beginning bit index to write the encoded form ox x in z.
148  */
149  static void encode(uint64_t x, uint64_t *& z, uint8_t & offset);
150 
151  template <class int_vector>
152  static uint64_t * raw_data(int_vector & v)
153  {
154  return v.m_data;
155  }
156 };
157 
158 // \sa coder::elias_gamma::encoding_length
159 template <typename T>
160 inline uint8_t elias_gamma<T>::encoding_length(uint64_t w)
161 {
162  uint8_t len_1 = w ? bits::hi(w) : 64;
163  return 2 * len_1 + 1;
164 }
165 
166 template <typename T>
167 template <class int_vector>
168 inline bool elias_gamma<T>::encode(const int_vector & v, int_vector & z)
169 {
170  typedef typename int_vector::size_type size_type;
171  z.width(v.width());
172  size_type z_bit_size = 0;
173  uint64_t w;
174  const uint64_t zero_val = v.width() < 64 ? (1ULL) << v.width() : 0;
175  for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
176  {
177  if ((w = *it) == 0) { w = zero_val; }
178  z_bit_size += encoding_length(w);
179  }
180  z.bit_resize(z_bit_size); // Initial size of z
181  z.shrink_to_fit();
182  if (z_bit_size & 0x3F)
183  { // if z_bit_size % 64 != 0
184  *(z.m_data + (z_bit_size >> 6)) = 0; // initialize last word
185  }
186  z_bit_size = 0;
187  uint64_t * z_data = z.m_data;
188  uint8_t offset = 0;
189  size_type len_1; // TODO: change to uint8_t and test it
190  for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
191  {
192  w = *it;
193  if (w == 0) { w = zero_val; }
194  // (number of bits to represent w)-1
195  if (!w)
196  {
197  len_1 = 64;
198  bits::write_int_and_move(z_data, 0ULL, offset, 64);
199  bits::write_int_and_move(z_data, 1ULL, offset, 1);
200  }
201  else
202  {
203  len_1 = bits::hi(w);
204  bits::write_int_and_move(z_data, 1ULL << len_1, offset, len_1 + 1);
205  }
206  if (len_1) { bits::write_int_and_move(z_data, w, offset, len_1); }
207  }
208  return true;
209 }
210 
211 template <typename T>
212 inline void elias_gamma<T>::encode(uint64_t x, uint64_t *& z, uint8_t & offset)
213 {
214  uint8_t len_1 = 0;
215  if (!x)
216  {
217  len_1 = 64;
218  bits::write_int_and_move(z, 0, offset, 64);
219  bits::write_int_and_move(z, 1, offset, 1);
220  }
221  else
222  {
223  len_1 = bits::hi(x);
224  bits::write_int_and_move(z, 1ULL << len_1, offset, len_1 + 1);
225  }
226  if (len_1) { bits::write_int_and_move(z, x, offset, len_1); }
227 }
228 
229 template <typename T>
230 template <class int_vector>
231 inline bool elias_gamma<T>::decode(const int_vector & z, int_vector & v)
232 {
233  typename int_vector::size_type len_1, n = 0;
234  const uint64_t * z_data = z.data();
235  const uint64_t * z_end = z.data() + (z.bit_size() >> 6);
236  uint8_t offset = 0;
237  while ((z_data < z_end) or (z_data == z_end and offset < (z.bit_size() & 0x3F)))
238  {
239  len_1 = bits::read_unary_and_move(z_data, offset);
240  if (len_1) { bits::move_right(z_data, offset, len_1); }
241  ++n;
242  }
243  v.width(z.width());
244  v.resize(n);
245  v.shrink_to_fit();
246  return decode<false, true>(z.data(), 0, n, v.begin());
247 }
248 
249 template <typename T>
250 template <bool t_sumup, bool t_inc, class t_iter>
251 inline uint64_t elias_gamma<T>::decode(const uint64_t * d, const size_type start_idx, size_type n, t_iter it)
252 {
253  d += (start_idx >> 6);
254  uint64_t value = 0;
255  size_type i = 0;
256  size_type len_1;
257  uint8_t offset = start_idx & 0x3F;
258  while (i++ < n)
259  { // while not all values are decoded
260  if (!t_sumup) value = 0;
261  len_1 = bits::read_unary_and_move(d, offset); // read length of x-1
262  if (!len_1) { value += 1; }
263  else
264  {
265  value += bits::read_int_and_move(d, offset, len_1) + (len_1 < 64) * (1ULL << len_1);
266  }
267  if (t_inc) *(it++) = value;
268  }
269  return value;
270 }
271 
272 template <typename T>
273 inline uint64_t elias_gamma<T>::decode_prefix_sum(const uint64_t * d,
274  const size_type start_idx,
275  const size_type end_idx,
276  size_type n)
277 {
278  if (n == 0) return 0;
279  const uint64_t * lastdata = d + ((end_idx + 63) >> 6);
280  d += (start_idx >> 6);
281  uint64_t w = 0, value = 0;
282  int16_t buffered = 0, read = start_idx & 0x3F;
283  size_type i = 0;
284  if (n + read <= 64)
285  {
286  if (((*d >> read) & bits::lo_set[n]) == bits::lo_set[n]) return n;
287  }
288  else
289  { // n+read > 64
290  if ((*d >> read) == bits::lo_set[64 - read])
291  { // all bits are set to 1
292  value = 64 - read;
293  ++d;
294  n -= (64 - read);
295  read = 0;
296  while (n >= 64)
297  {
298  if (*d == 0xFFFFFFFFFFFFFFFFULL)
299  {
300  value += 64;
301  ++d;
302  n -= 64;
303  }
304  else
305  goto start_decoding;
306  }
307  // 0 <= n <= 64
308  if ((*d & bits::lo_set[n]) == bits::lo_set[n]) return value + n;
309  }
310  }
311 
312 start_decoding:
313  while (i < n)
314  {
315  while (buffered < 64 and d < lastdata)
316  {
317  fill_buffer:
318  w |= (((*d) >> read) << buffered);
319  if (read >= buffered)
320  {
321  ++d;
322  buffered += 64 - read;
323  read = 0;
324  }
325  else
326  { // read buffered
327  read += 64 - buffered;
328  buffered = 64;
329  }
330  }
331  uint32_t rbp = bits::lo(~w);
332  if (rbp > 0)
333  {
334  i += rbp;
335  value += rbp;
336  if (i >= n)
337  { // decoded too much
338  return value - (i - n); // return corrected value
339  }
340  assert((int64_t)buffered >= rbp);
341  buffered -= rbp;
342  w >>= rbp;
343  if (buffered < 16) goto fill_buffer;
344  }
345  {
346  // i < n
347  begin_decode:
348  uint32_t psum = elias_gamma<T>::data.prefixsum[w & 0x0000FFFF];
349  if (!psum or i + ((psum >> 16) & 0x00FF) > n)
350  {
351  if (w == 0)
352  { // buffer is not full
353  w |= (((*d) >> read) << buffered);
354  if (read >= buffered)
355  {
356  ++d;
357  buffered += 64 - read;
358  read = 0;
359  }
360  else
361  {
362  read += 64 - buffered;
363  buffered = 64;
364  };
365  if (!w)
366  {
367  w |= (((*d) >> read) << buffered);
368  if (read >= buffered)
369  {
370  ++d;
371  buffered += 64 - read;
372  read = 0;
373  }
374  else
375  {
376  read += 64 - buffered;
377  buffered = 64;
378  };
379  }
380  }
381  // assert(w>0);
382  uint16_t len_1 = bits::lo(w); // read length of length
383  buffered -= (len_1 + 1);
384  w >>= (len_1 + 1);
385  if (len_1 > buffered)
386  { // buffer is not full
387  w |= (((*d) >> read) << buffered);
388  if (read >= buffered)
389  {
390  ++d;
391  buffered += 64 - read;
392  read = 0;
393  }
394  else
395  {
396  read += 64 - buffered;
397  buffered = 64;
398  };
399  if (len_1 > buffered)
400  {
401  w |= (((*d) >> read) << buffered);
402  if (read >= buffered)
403  {
404  ++d;
405  buffered += 64 - read;
406  read = 0;
407  }
408  else
409  {
410  read += 64 - buffered;
411  buffered = 64;
412  };
413  }
414  }
415  value += (w & bits::lo_set[len_1]) + (len_1 < 64) * (1ULL << len_1);
416  buffered -= len_1;
417  if (len_1 < 64) { w >>= len_1; }
418  else
419  {
420  w = 0;
421  }
422  ++i;
423  if (i == n) return value;
424  if (buffered >= 16) goto begin_decode;
425  }
426  else
427  {
428  value += (psum & 0x0000FFFF);
429  i += ((psum >> 16) & 0x00FF);
430  if (i == n) return value;
431  buffered -= (psum >> 24);
432  w >>= (psum >> 24);
433  if (buffered >= 16) goto begin_decode;
434  }
435  }
436  };
437  return value;
438 }
439 
440 template <typename T>
441 inline uint64_t elias_gamma<T>::decode_prefix_sum(const uint64_t * d, const size_type start_idx, size_type n)
442 {
443  if (n == 0) return 0;
444  d += (start_idx >> 6);
445  uint64_t value = 0;
446  size_type i = 0;
447  uint8_t offset = start_idx & 0x3F;
448 
449  if (n < 24)
450  {
451  if (n + offset <= 64)
452  {
453  if (((*d >> offset) & bits::lo_set[n]) == bits::lo_set[n]) return n;
454  }
455  else
456  { // n+offset > 64
457  if ((*d >> offset) == bits::lo_set[64 - offset])
458  { // all bits are set to 1
459  value = 64 - offset;
460  ++d;
461  n -= (64 - offset);
462  offset = 0;
463  while (n >= 64)
464  {
465  if (*d == 0xFFFFFFFFFFFFFFFFULL)
466  {
467  value += 64;
468  ++d;
469  n -= 64;
470  }
471  else
472  {
473  uint8_t temp = bits::lo(~(*d));
474  value += temp;
475  n -= temp;
476  offset = temp;
477  goto start_decoding;
478  }
479  }
480  // 0 <= n <= 64
481  if ((*d & bits::lo_set[n]) == bits::lo_set[n]) return value + n;
482  }
483  }
484  }
485 
486 start_decoding:
487  while (i < n)
488  { // while not all values are decoded
489  // n-i values to decode
490 
491  if (((*d >> offset) & 0xF) == 0xF)
492  {
493  uint8_t maxdecode = n - i > 63 ? 63 : n - i;
494  uint8_t rbp = bits::lo(~bits::read_int(d, offset, maxdecode));
495  i += rbp;
496  value += rbp;
497  if (rbp + offset >= 64)
498  {
499  ++d;
500  offset = (rbp + offset) & 0x3F;
501  }
502  else
503  {
504  offset += rbp;
505  }
506  if (rbp == maxdecode) continue;
507  }
508 
509  while (i < n)
510  {
511  uint32_t psum = elias_gamma<T>::data.prefixsum[bits::read_int(d, offset, 16)];
512  if (psum == 0)
513  { // value does not fit in 16 bits
514  goto decode_single;
515  }
516  else if (i + ((psum >> 16) & 0x00FF) > n)
517  { // decoded too much
518  if (n - i <= 8)
519  {
520  psum = elias_gamma<T>::data.prefixsum_8bit[bits::read_int(d, offset, 8) | ((n - i - 1) << 8)];
521  if (psum > 0)
522  {
523  value += (psum & 0xFF);
524  i += ((psum >> 8) & 0xF);
525  offset += (psum >> 12);
526  if (offset >= 64)
527  {
528  offset &= 0x3F;
529  ++d;
530  }
531  }
532  }
533  break;
534  }
535  else
536  {
537  value += (psum & 0x0000FFFF);
538  i += ((psum >> 16) & 0x00FF);
539  offset += (psum >> 24);
540  if (offset >= 64)
541  {
542  offset &= 0x3F;
543  ++d;
544  }
545  }
546  }
547  if (i < n)
548  {
549  decode_single:
550  i++;
551  uint16_t len_1 = bits::read_unary_and_move(d, offset); // read length of length of x
552  value += bits::read_int_and_move(d, offset, len_1) + (len_1 < 64) * (1ULL << len_1);
553  }
554  }
555  return value;
556 }
557 
558 template <typename T>
560 
561 } // end namespace coder
562 
563 } // end namespace sdsl
564 #endif
A class to encode and decode between Elias- and binary code.
static bool encode(const int_vector &v, int_vector &z)
static uint64_t * raw_data(int_vector &v)
static struct sdsl::coder::elias_gamma::impl data
static uint64_t decode_prefix_sum(const uint64_t *d, const size_type start_idx, size_type n)
Decode n Elias gamma encoded integers beginning at start_idx in the bitstring "data" and return the s...
static uint8_t encoding_length(uint64_t)
static const uint8_t min_codeword_length
static uint64_t decode(const uint64_t *data, const size_type start_idx, size_type n, t_iter it=(t_iter) nullptr)
Decode n Elias-delta encoded bits beginning at start_idx in the bitstring "data".
A generic vector class for integers of width .
Definition: int_vector.hpp:253
iterator end() noexcept
Iterator that points to the element after the last element of int_vector.
Definition: int_vector.hpp:788
int_vector_size_type size_type
Definition: int_vector.hpp:266
size_type bit_size() const noexcept
The number of bits in the int_vector.
Definition: int_vector.hpp:571
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:590
void shrink_to_fit()
Free unused allocated memory.
Definition: int_vector.hpp:530
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
Definition: int_vector.hpp:619
void resize(const size_type size)
Resize the int_vector in terms of elements.
Definition: int_vector.hpp:545
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
Definition: int_vector.hpp:783
int_vector.hpp contains the sdsl::int_vector class.
int_vector ::size_type size_type
Namespace for the succinct data structure library.
static SDSL_CONSTEXPR uint64_t read_unary_and_move(const uint64_t *&word, uint8_t &offset)
Reads an unary decoded value from a bit position in an array and moves the bit-pointer.
Definition: bits.hpp:857
static SDSL_CONSTEXPR uint64_t read_int(const uint64_t *word, uint8_t offset=0, const uint8_t len=64)
Reads a value from a bit position in an array.
Definition: bits.hpp:783
static SDSL_CONSTEXPR void move_right(const uint64_t *&word, uint8_t &offset, const uint8_t len)
Move the bit-pointer (=uint64_t word and offset) len to the right.
Definition: bits.hpp:892
static SDSL_CONSTEXPR void write_int_and_move(uint64_t *&word, uint64_t x, uint8_t &offset, const uint8_t len)
Writes value x to an bit position in an array and moves the bit-pointer.
Definition: bits.hpp:752
static SDSL_CONSTEXPR uint32_t lo(uint64_t x)
Calculates the position of the rightmost 1-bit in the 64bit integer x if it exists.
Definition: bits.hpp:696
constexpr static uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition: bits.hpp:197
static SDSL_CONSTEXPR uint64_t read_unary_bounded(const uint64_t *word, uint8_t offset=0)
Definition: bits.hpp:846
static SDSL_CONSTEXPR uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:661
static SDSL_CONSTEXPR uint64_t read_int_and_move(const uint64_t *&word, uint8_t &offset, const uint8_t len=64)
Reads a value from a bit position in an array and moved the bit-pointer.
Definition: bits.hpp:805
static SDSL_CONSTEXPR uint64_t read_int_bounded(const uint64_t *word, uint8_t offset=0, const uint8_t len=64)
Definition: bits.hpp:799