SDSL  3.0.0
Succinct Data Structure Library
coder_elias_delta.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
8 #ifndef SDSL_CODER_ELIAS_DELTA
9 #define SDSL_CODER_ELIAS_DELTA
10 
11 #include <sdsl/int_vector.hpp>
12 
13 namespace sdsl
14 {
15 
16 namespace coder
17 {
18 
20 template <typename T = void>
22 {
23  public:
24  typedef uint64_t size_type;
25 
26  static struct impl
27  {
29 
33  uint32_t prefixsum[1 << 16];
34 
35  uint16_t prefixsum_8bit[(1 << 8) * 8];
36 
37  impl()
38  {
39  // initialize prefixsum
40  for (uint64_t x = 0; x < (1 << 16); ++x)
41  {
42  const uint64_t * w = &x; // copy of x
43  uint64_t value = 0;
44  uint16_t numbers = 0, offset = 0, offset2 = 0;
45  while ((x >> offset) != 0)
46  {
47  uint64_t len_1_len = bits::read_unary_bounded(w, offset), len = 0;
48  if (len_1_len == 0)
49  {
50  offset += 1;
51  value += 1;
52  ++numbers;
53  }
54  else
55  {
56  offset2 = offset + len_1_len + 1;
57  len = bits::read_int_bounded(w, offset2, len_1_len) + (1ULL << len_1_len);
58  offset2 += len_1_len;
59  if (offset2 + len - 1 <= 16)
60  {
61  value += bits::read_int_bounded(w, offset2, len - 1) + (1ULL << (len - 1));
62  offset = offset2 + len - 1;
63  ++numbers;
64  }
65  else
66  break;
67  }
68  }
69  uint32_t result = 0; // the highest 8 bit equal the shift/offset, the second highest 8 bit equal the
70  // number of decoded values,
71  // and the last 16 bit equals value of decoded prefix sum
72  result = (offset << 24) | (numbers << 16) | value;
73  if (value > 0) assert(offset > 0 and numbers > 0 and offset <= 16 and numbers <= 16);
74  prefixsum[x] = result;
75  }
76  // initialize prefixsum_8bit
77 
78  for (uint32_t maxi = 1, idx = 0; maxi <= 8; ++maxi)
79  {
80  for (uint64_t x = 0; x < (1 << 8); ++x)
81  {
82  const uint64_t * w = &x; // copy of x
83  uint64_t value = 0;
84  uint32_t numbers = 0, offset = 0, offset2 = 0;
85  while ((x >> offset) != 0 and numbers < maxi)
86  {
87  uint64_t len_1_len = bits::read_unary_bounded(w, offset), len = 0;
88  if (len_1_len == 0)
89  {
90  offset += 1;
91  value += 1;
92  ++numbers;
93  }
94  else
95  {
96  offset2 = offset + len_1_len + 1;
97  len = bits::read_int_bounded(w, offset2, len_1_len) + (1ULL << len_1_len);
98  offset2 += len_1_len;
99  if (offset2 + len - 1 <= 8)
100  {
101  value += bits::read_int_bounded(w, offset2, len - 1) + (1ULL << (len - 1));
102  offset = offset2 + len - 1;
103  ++numbers;
104  }
105  else
106  break;
107  }
108  }
109  uint16_t result = 0; // the highest 8 bit equal the shift/offset, the second highest 8 bit equal the
110  // number of decoded values,
111  // and the last 16 bit equals value of decoded prefix sum
112  result = (offset << 8) | (numbers << 4) | value;
113  prefixsum_8bit[idx++] = result;
114  }
115  }
116  }
117  } data;
118 
119  static const uint8_t min_codeword_length = 1; // 1 represents 1 and is the code word with minimum length
120  static uint8_t encoding_length(uint64_t);
122  /* \param data Bitstring
123  * \param start_idx Starting index of the decoding.
124  * \param n Number of values to decode from the bitstring.
125  * \param it Iterator to decode the values.
126  */
127  template <bool t_sumup, bool t_inc, class t_iter>
128  static uint64_t decode(const uint64_t * data, const size_type start_idx, size_type n, t_iter it = (t_iter) nullptr);
129 
132 
137  static uint64_t decode_prefix_sum(const uint64_t * d, const size_type start_idx, size_type n);
138  static uint64_t decode_prefix_sum(const uint64_t * d,
139  const size_type start_idx,
140  const size_type end_idx,
141  size_type n);
142 
143  template <class int_vector>
144  static bool encode(const int_vector & v, int_vector & z);
145  template <class int_vector>
146  static bool decode(const int_vector & z, int_vector & v);
147 
149  /* \param x Positive integer to encode.
150  * \param z Raw data of vector to write the encoded form of x.
151  * \param start_idx Beginning bit index to write the encoded form ox x in z.
152  */
153  static void encode(uint64_t x, uint64_t *& z, uint8_t & offset);
154 
155  template <class int_vector>
156  static uint64_t * raw_data(int_vector & v)
157  {
158  return v.m_data;
159  }
160 };
161 
162 // \sa coder::elias_delta::encoding_length
163 template <typename T>
164 inline uint8_t elias_delta<T>::encoding_length(uint64_t w)
165 {
166  uint8_t len_1 = w ? bits::hi(w) : 64;
167  return len_1 + (bits::hi(len_1 + 1) << 1) + 1;
168 }
169 
170 template <typename T>
171 template <class int_vector>
172 inline bool elias_delta<T>::encode(const int_vector & v, int_vector & z)
173 {
174  typedef typename int_vector::size_type size_type;
175  z.width(v.width());
176  size_type z_bit_size = 0;
177  uint64_t w;
178  const uint64_t zero_val = v.width() < 64 ? (1ULL) << v.width() : 0;
179  for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
180  {
181  if ((w = *it) == 0) { w = zero_val; }
182  z_bit_size += encoding_length(w);
183  }
184  z.bit_resize(z_bit_size); // Initial size of z
185  z.shrink_to_fit();
186  if (z_bit_size & 0x3F)
187  { // if z_bit_size % 64 != 0
188  *(z.m_data + (z_bit_size >> 6)) = 0; // initialize last word
189  }
190  z_bit_size = 0;
191  uint64_t * z_data = z.m_data;
192  uint8_t offset = 0;
193  size_type len, len_1_len; // TODO: change to uint8_t and test it
194  for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
195  {
196  w = *it;
197  if (w == 0) { w = zero_val; }
198  // (number of bits to represent w)
199  len = w ? bits::hi(w) + 1 : 65;
200  // (number of bits to represent the length of w) -1
201  len_1_len = bits::hi(len);
202  // Write unary representation for the length of the length of w
203  bits::write_int_and_move(z_data, 1ULL << len_1_len, offset, len_1_len + 1);
204  if (len_1_len)
205  {
206  bits::write_int_and_move(z_data, len, offset, len_1_len);
207  bits::write_int_and_move(z_data, w, offset, len - 1);
208  }
209  }
210  return true;
211 }
212 
213 template <typename T>
214 inline void elias_delta<T>::encode(uint64_t x, uint64_t *& z, uint8_t & offset)
215 {
216  uint8_t len, len_1_len;
217  // (number of bits to represent w)
218  len = x ? bits::hi(x) + 1 : 65;
219  // (number of bits to represent the length of w) - 1
220  len_1_len = bits::hi(len);
221  // Write unary representation for the length of the length of w
222  bits::write_int_and_move(z, 1ULL << len_1_len, offset, len_1_len + 1);
223  if (len_1_len)
224  {
225  bits::write_int_and_move(z, len, offset, len_1_len);
226  bits::write_int_and_move(z, x, offset, len - 1);
227  }
228 }
229 
230 template <typename T>
231 inline uint64_t elias_delta<T>::decode_prefix_sum(const uint64_t * d,
232  const size_type start_idx,
233  const size_type end_idx,
234  size_type n)
235 {
236  if (n == 0) return 0;
237  const uint64_t * lastdata = d + ((end_idx + 63) >> 6);
238  d += (start_idx >> 6);
239  uint64_t w = 0, value = 0;
240  int16_t buffered = 0, read = start_idx & 0x3F;
241  size_type i = 0;
242  if (n + read <= 64)
243  {
244  if (((*d >> read) & bits::lo_set[n]) == bits::lo_set[n]) return n;
245  }
246  else
247  { // n+read > 64
248  if ((*d >> read) == bits::lo_set[64 - read])
249  { // all bits are set to 1
250  value = 64 - read;
251  ++d;
252  n -= (64 - read);
253  read = 0;
254  while (n >= 64)
255  {
256  if (*d == 0xFFFFFFFFFFFFFFFFULL)
257  {
258  value += 64;
259  ++d;
260  n -= 64;
261  }
262  else
263  goto start_decoding;
264  }
265  // 0 <= n <= 64
266  if ((*d & bits::lo_set[n]) == bits::lo_set[n]) return value + n;
267  }
268  }
269 
270 start_decoding:
271  while (i < n)
272  {
273  while (buffered < 64 and d < lastdata)
274  {
275  fill_buffer:
276  w |= (((*d) >> read) << buffered);
277  if (read >= buffered)
278  {
279  ++d;
280  buffered += 64 - read;
281  read = 0;
282  }
283  else
284  { // read buffered
285  read += 64 - buffered;
286  buffered = 64;
287  }
288  }
289  /* if(w==0xFFFFFFFFFFFFFFFFULL){
290  * i += 64;
291  * value += 64;
292  * if(i >= n)
293  * return value - (i-n);
294  * buffered = 0;
295  * w = 0;
296  * // continue;
297  * goto fill_buffer;
298  * }
299  */
300  // uint32_t rbp = (w == 0xFFFFFFFFFFFFFFFFULL)?64:bits::lo(~w);
301  uint32_t rbp = bits::lo(~w);
302  if (rbp > 0)
303  {
304  i += rbp;
305  value += rbp;
306  if (i >= n)
307  { // decoded too much
308  return value - (i - n); // return corrected value
309  }
310  assert((int64_t)buffered >= rbp);
311  buffered -= rbp;
312  w >>= rbp;
313  if (buffered < 16)
314  // continue;
315  goto fill_buffer;
316  }
317  // assert(w!=0xFFFFFFFFFFFFFFFFULL);
318  // else
319  {
320  // i < n
321  begin_decode:
322  uint32_t psum = elias_delta<T>::data.prefixsum[w & 0x0000FFFF];
323  if (!psum or i + ((psum >> 16) & 0x00FF) > n)
324  {
325  if (w == 0)
326  { // buffer is not full
327  w |= (((*d) >> read) << buffered);
328  if (read >= buffered)
329  {
330  ++d;
331  buffered += 64 - read;
332  read = 0;
333  }
334  else
335  {
336  read += 64 - buffered;
337  buffered = 64;
338  };
339  if (!w)
340  {
341  w |= (((*d) >> read) << buffered);
342  if (read >= buffered)
343  {
344  ++d;
345  buffered += 64 - read;
346  read = 0;
347  }
348  else
349  {
350  read += 64 - buffered;
351  buffered = 64;
352  };
353  }
354  }
355  // assert(w>0);
356  uint16_t len_1_len = bits::lo(w); // read length of length
357  buffered -= (len_1_len + 1);
358  w >>= (len_1_len + 1);
359  if (len_1_len > buffered)
360  { // buffer is not full
361  w |= (((*d) >> read) << buffered);
362  if (read >= buffered)
363  {
364  ++d;
365  buffered += 64 - read;
366  read = 0;
367  }
368  else
369  {
370  read += 64 - buffered;
371  buffered = 64;
372  };
373  if (len_1_len > buffered)
374  {
375  w |= (((*d) >> read) << buffered);
376  if (read >= buffered)
377  {
378  ++d;
379  buffered += 64 - read;
380  read = 0;
381  }
382  else
383  {
384  read += 64 - buffered;
385  buffered = 64;
386  };
387  }
388  }
389  // assert(len_1_len <= buffered);
390  uint16_t len_1 = (w & bits::lo_set[len_1_len]) + (1ULL << len_1_len) - 1;
391  buffered -= len_1_len;
392  w >>= len_1_len;
393  if (len_1 > buffered)
394  { // buffer is not full
395  w |= (((*d) >> read) << buffered);
396  if (read >= buffered)
397  {
398  ++d;
399  buffered += 64 - read;
400  read = 0;
401  }
402  else
403  {
404  read += 64 - buffered;
405  buffered = 64;
406  };
407  if (len_1 > buffered)
408  {
409  w |= (((*d) >> read) << buffered);
410  if (read >= buffered)
411  {
412  ++d;
413  buffered += 64 - read;
414  read = 0;
415  }
416  else
417  {
418  read += 64 - buffered;
419  buffered = 64;
420  };
421  }
422  }
423  // if( len_1 > buffered ){
424  // std::cerr<<"len_1="<<len_1<<" buffered = "<<buffered<<std::endl;
425  // }
426  // assert(len_1 <= buffered);
427  value += (w & bits::lo_set[len_1]) + (len_1 < 64) * (1ULL << (len_1));
428  buffered -= len_1;
429  if (len_1 < 64) { w >>= len_1; }
430  else
431  {
432  w = 0;
433  }
434  ++i;
435  if (i == n) return value;
436  if (buffered >= 16) goto begin_decode;
437  }
438  else
439  {
440  value += (psum & 0x0000FFFF);
441  i += ((psum >> 16) & 0x00FF);
442  if (i == n) return value;
443  buffered -= (psum >> 24);
444  w >>= (psum >> 24);
445  if (buffered >= 16) goto begin_decode;
446  }
447  }
448  // std::cerr<<i<<" / "<<n<<std::endl;
449  };
450  // std::cerr<<value<<std::endl;
451  return value;
452 }
453 
454 template <typename T>
455 inline uint64_t elias_delta<T>::decode_prefix_sum(const uint64_t * d, const size_type start_idx, size_type n)
456 {
457  if (n == 0) return 0;
458  d += (start_idx >> 6);
459  uint64_t value = 0;
460  size_type i = 0;
461  uint8_t offset = start_idx & 0x3F;
462 
463  if (n < 24)
464  {
465  if (n + offset <= 64)
466  {
467  if (((*d >> offset) & bits::lo_set[n]) == bits::lo_set[n]) return n;
468  }
469  else
470  { // n+offset > 64
471  if ((*d >> offset) == bits::lo_set[64 - offset])
472  { // all bits are set to 1
473  value = 64 - offset;
474  ++d;
475  n -= (64 - offset);
476  offset = 0;
477  while (n >= 64)
478  {
479  if (*d == 0xFFFFFFFFFFFFFFFFULL)
480  {
481  value += 64;
482  ++d;
483  n -= 64;
484  }
485  else
486  {
487  uint8_t temp = bits::lo(~(*d));
488  value += temp;
489  n -= temp;
490  offset = temp;
491  goto start_decoding;
492  }
493  }
494  // 0 <= n <= 64
495  if ((*d & bits::lo_set[n]) == bits::lo_set[n]) return value + n;
496  }
497  }
498  }
499 
500 start_decoding:
501 
502  while (i < n)
503  { // while not all values are decoded
504  // n-i values to decode
505  if (((*d >> offset) & 0xF) == 0xF)
506  {
507  uint8_t maxdecode = n - i > 63 ? 63 : n - i;
508  uint8_t rbp = bits::lo(~bits::read_int(d, offset, maxdecode));
509  i += rbp;
510  value += rbp;
511  if (rbp + offset >= 64)
512  {
513  ++d;
514  offset = (rbp + offset) & 0x3F;
515  }
516  else
517  {
518  offset += rbp;
519  }
520  if (rbp == maxdecode) continue;
521  }
522  while (i < n)
523  {
524  uint32_t psum = elias_delta<T>::data.prefixsum[bits::read_int(d, offset, 16)];
525  // if( psum == 0 or i+((psum>>16)&0x00FF) > n ){ // value does not fit in 16 bits
526  if (psum == 0)
527  { // value does not fit in 16 bits
528  goto decode_single;
529  }
530  else if (i + ((psum >> 16) & 0x00FF) > n)
531  { // decoded too much
532  if (n - i <= 8)
533  {
534  psum = elias_delta<T>::data.prefixsum_8bit[bits::read_int(d, offset, 8) | ((n - i - 1) << 8)];
535  if (psum > 0)
536  {
537  value += (psum & 0xF);
538  i += ((psum >> 4) & 0xF);
539  offset += (psum >> 8);
540  if (offset >= 64)
541  {
542  offset &= 0x3F;
543  ++d;
544  }
545  }
546  }
547  break;
548  }
549  else
550  {
551  value += (psum & 0x0000FFFF);
552  i += ((psum >> 16) & 0x00FF);
553  offset += (psum >> 24);
554  if (offset >= 64)
555  {
556  offset &= 0x3F;
557  ++d;
558  }
559  }
560  }
561  if (i < n)
562  {
563  decode_single:
564  i++;
565  uint16_t len_1_len = bits::read_unary_and_move(d, offset); // read length of length of x
566  uint16_t len_1 = bits::read_int_and_move(d, offset, len_1_len) + (1ULL << len_1_len) - 1;
567  value += bits::read_int_and_move(d, offset, len_1) + (len_1 < 64) * (1ULL << (len_1));
568  // std::cout<<"decode single ("<<len_1_len<<","<<len_1<<","<<value<<")"<<std::endl;
569  }
570  }
571  return value;
572 }
573 
574 template <typename T>
575 template <class int_vector>
576 inline bool elias_delta<T>::decode(const int_vector & z, int_vector & v)
577 {
578  typename int_vector::size_type len_1_len, len, n = 0;
579  const uint64_t * z_data = z.data();
580  const uint64_t * z_end = z.data() + (z.bit_size() >> 6);
581  uint8_t offset = 0;
582  while ((z_data < z_end) or (z_data == z_end and offset < (z.bit_size() & 0x3F)))
583  {
584  len_1_len = bits::read_unary_and_move(z_data, offset);
585  if (len_1_len)
586  {
587  len = bits::read_int_and_move(z_data, offset, len_1_len) + (1ULL << len_1_len);
588  bits::move_right(z_data, offset, len - 1);
589  }
590  ++n;
591  }
592  v.width(z.width());
593  v.resize(n);
594  v.shrink_to_fit();
595  return decode<false, true>(z.data(), 0, n, v.begin());
596 }
597 
598 template <typename T>
599 template <bool t_sumup, bool t_inc, class t_iter>
600 inline uint64_t elias_delta<T>::decode(const uint64_t * d, const size_type start_idx, size_type n, t_iter it)
601 {
602  d += (start_idx >> 6);
603  uint64_t value = 0;
604  size_type i = 0;
605  size_type len_1_len, len;
606  uint8_t offset = start_idx & 0x3F;
607  while (i++ < n)
608  { // while not all values are decoded
609  if (!t_sumup) value = 0;
610  len_1_len = bits::read_unary_and_move(d, offset); // read length of length of x
611  if (!len_1_len) { value += 1; }
612  else
613  {
614  len = bits::read_int_and_move(d, offset, len_1_len) + (1ULL << len_1_len);
615  value += bits::read_int_and_move(d, offset, len - 1) + (len - 1 < 64) * (1ULL << (len - 1));
616  }
617  if (t_inc) *(it++) = value;
618  }
619  return value;
620 }
621 
622 template <typename T>
624 
625 } // end namespace coder
626 } // end namespace sdsl
627 #endif
A class to encode and decode between Elias- and binary code.
static bool encode(const int_vector &v, int_vector &z)
static uint64_t decode_prefix_sum(const uint64_t *d, const size_type start_idx, size_type n)
Decode n Elias delta encoded integers beginning at start_idx in the bitstring "data" and return the s...
static struct sdsl::coder::elias_delta::impl data
static const uint8_t min_codeword_length
static uint8_t encoding_length(uint64_t)
static uint64_t * raw_data(int_vector &v)
static uint64_t decode(const uint64_t *data, const size_type start_idx, size_type n, t_iter it=(t_iter) nullptr)
Decode n Elias-delta encoded bits beginning at start_idx in the bitstring "data".
A generic vector class for integers of width .
Definition: int_vector.hpp:253
iterator end() noexcept
Iterator that points to the element after the last element of int_vector.
Definition: int_vector.hpp:788
int_vector_size_type size_type
Definition: int_vector.hpp:266
size_type bit_size() const noexcept
The number of bits in the int_vector.
Definition: int_vector.hpp:571
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:590
void shrink_to_fit()
Free unused allocated memory.
Definition: int_vector.hpp:530
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
Definition: int_vector.hpp:619
void resize(const size_type size)
Resize the int_vector in terms of elements.
Definition: int_vector.hpp:545
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
Definition: int_vector.hpp:783
int_vector.hpp contains the sdsl::int_vector class.
int_vector ::size_type size_type
Namespace for the succinct data structure library.
static SDSL_CONSTEXPR uint64_t read_unary_and_move(const uint64_t *&word, uint8_t &offset)
Reads an unary decoded value from a bit position in an array and moves the bit-pointer.
Definition: bits.hpp:857
static SDSL_CONSTEXPR uint64_t read_int(const uint64_t *word, uint8_t offset=0, const uint8_t len=64)
Reads a value from a bit position in an array.
Definition: bits.hpp:783
static SDSL_CONSTEXPR void move_right(const uint64_t *&word, uint8_t &offset, const uint8_t len)
Move the bit-pointer (=uint64_t word and offset) len to the right.
Definition: bits.hpp:892
static SDSL_CONSTEXPR void write_int_and_move(uint64_t *&word, uint64_t x, uint8_t &offset, const uint8_t len)
Writes value x to an bit position in an array and moves the bit-pointer.
Definition: bits.hpp:752
static SDSL_CONSTEXPR uint32_t lo(uint64_t x)
Calculates the position of the rightmost 1-bit in the 64bit integer x if it exists.
Definition: bits.hpp:696
constexpr static uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition: bits.hpp:197
static SDSL_CONSTEXPR uint64_t read_unary_bounded(const uint64_t *word, uint8_t offset=0)
Definition: bits.hpp:846
static SDSL_CONSTEXPR uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:661
static SDSL_CONSTEXPR uint64_t read_int_and_move(const uint64_t *&word, uint8_t &offset, const uint8_t len=64)
Reads a value from a bit position in an array and moved the bit-pointer.
Definition: bits.hpp:805
static SDSL_CONSTEXPR uint64_t read_int_bounded(const uint64_t *word, uint8_t offset=0, const uint8_t len=64)
Definition: bits.hpp:799