SDSL  3.0.0
Succinct Data Structure Library
bit_vector_il.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
9 #ifndef SDSL_BIT_VECTOR_IL
10 #define SDSL_BIT_VECTOR_IL
11 
12 #include <queue>
13 
14 #include <sdsl/int_vector.hpp>
15 #include <sdsl/iterators.hpp>
16 #include <sdsl/util.hpp>
17 
19 namespace sdsl
20 {
21 
22 template <uint8_t t_b = 1, uint32_t t_bs = 512> // forward declaration needed for friend declaration
23 class rank_support_il; // in bit_vector_il
24 
25 template <uint8_t t_b = 1, uint32_t t_bs = 512> // forward declaration needed for friend declaration
26 class select_support_il; // in bit_vector_il
27 
28 template <class T>
29 constexpr bool power_of_two(T x)
30 {
31  return std::is_integral<T>::value and x > 1 and !(x & (x - 1));
32 }
33 
35 
43 template <uint32_t t_bs = 512>
45 {
46  static_assert(t_bs >= 64, "bit_vector_il: blocksize must be be at least 64 bits.");
47  static_assert(power_of_two(t_bs), "bit_vector_il: blocksize must be a power of two.");
48 
49  public:
56 
57  friend class rank_support_il<1, t_bs>;
58  friend class rank_support_il<0, t_bs>;
59  friend class select_support_il<1, t_bs>;
60  friend class select_support_il<0, t_bs>;
61 
66 
67  private:
68  size_type m_size = 0;
69  size_type m_block_num = 0;
70  size_type m_superblocks = 0;
71  size_type m_block_shift = 0;
72  int_vector<64> m_data;
73  int_vector<64> m_rank_samples;
74 
75  // precondition: m_rank_samples.size() <= m_superblocks
76  void init_rank_samples()
77  {
78  uint32_t blockSize_U64 = bits::hi(t_bs >> 6);
79  size_type idx = 0;
80  std::queue<size_type> lbs, rbs;
81  lbs.push(0);
82  rbs.push(m_superblocks);
83  while (!lbs.empty())
84  {
85  size_type lb = lbs.front();
86  lbs.pop();
87  size_type rb = rbs.front();
88  rbs.pop();
89  if (/*lb < rb and*/ idx < m_rank_samples.size())
90  {
91  size_type mid = lb + (rb - lb) / 2; // select mid \in [lb..rb)
92  size_type pos = (mid << blockSize_U64) + mid;
93  m_rank_samples[idx++] = m_data[pos];
94  lbs.push(lb);
95  rbs.push(mid);
96  lbs.push(mid + 1);
97  rbs.push(rb);
98  }
99  }
100  }
101 
102  public:
104  bit_vector_il(const bit_vector_il &) = default;
106  bit_vector_il & operator=(const bit_vector_il &) = default;
108 
110  {
111  m_size = bv.size();
112  /* calculate the number of superblocks */
113  // each block of size > 0 gets suberblock in which we store the cumulative sum up to this block
114  m_superblocks = (m_size + t_bs) / t_bs;
115  m_block_shift = bits::hi(t_bs);
116  /* allocate new data */
117  size_type blocks = (m_size + 64) / 64;
118  size_type mem = blocks + m_superblocks + 1;
119  // ^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^ ^
120  // bit vector data | cum. sum data | sum after last block
121  m_data = int_vector<64>(mem);
122  m_block_num = mem;
123 
124  /* assign data and calculate super block values */
125  const uint64_t * bvp = bv.data();
126 
127  size_type j = 0; // 64-bit word counter in the m_data
128  size_type cum_sum = 0;
129  size_type sample_rate = t_bs / 64;
130  for (size_type i = 0, sample_cnt = sample_rate; i < blocks; ++i, ++sample_cnt)
131  {
132  if (sample_cnt == sample_rate)
133  {
134  m_data[j] = cum_sum;
135  sample_cnt = 0;
136  j++;
137  }
138  m_data[j] = bvp[i];
139  cum_sum += bits::cnt(m_data[j]);
140  j++;
141  }
142  m_data[j] = cum_sum; /* last superblock so we can always
143  get num_ones fast */
144  if (m_block_num > 1024 * 64)
145  {
146  // we store at most m_superblocks+1 rank_samples:
147  // we do a cache efficient binary search for the select on X=1024
148  // or X=the smallest power of two smaller than m_superblock
149  m_rank_samples.resize(std::min(1024ULL, 1ULL << bits::hi(m_superblocks)));
150  }
151  init_rank_samples();
152  }
153 
155 
161  {
162  assert(i < m_size);
163  size_type bs = i >> m_block_shift;
164  size_type block = bs + (i >> 6) + 1;
165  return ((m_data[block] >> (i & 63)) & 1ULL);
166  }
167 
169 
176  uint64_t get_int(size_type idx, uint8_t len = 64) const
177  {
178  assert(idx + len - 1 < m_size);
179  size_type bs = idx >> m_block_shift;
180  size_type b_block = bs + (idx >> 6) + 1;
181  bs = (idx + len - 1) >> m_block_shift;
182  size_type e_block = bs + ((idx + len - 1) >> 6) + 1;
183  if (b_block == e_block)
184  { // spans on block
185  return (m_data[b_block] >> (idx & 63)) & bits::lo_set[len];
186  }
187  else
188  { // spans two blocks
189  uint8_t b_len = 64 - (idx & 63);
190  return (m_data[b_block] >> (idx & 63)) | (m_data[e_block] & bits::lo_set[len - b_len]) << b_len;
191  }
192  }
193 
195  size_type size() const { return m_size; }
196 
198  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
199  {
200  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
201  size_type written_bytes = 0;
202  written_bytes += write_member(m_size, out, child, "size");
203  written_bytes += write_member(m_block_num, out, child, "block_num");
204  written_bytes += write_member(m_superblocks, out, child, "superblocks");
205  written_bytes += write_member(m_block_shift, out, child, "block_shift");
206  written_bytes += m_data.serialize(out, child, "data");
207  written_bytes += m_rank_samples.serialize(out, child, "rank_samples");
208  structure_tree::add_size(child, written_bytes);
209  return written_bytes;
210  }
211 
213  void load(std::istream & in)
214  {
215  read_member(m_size, in);
216  read_member(m_block_num, in);
217  read_member(m_superblocks, in);
218  read_member(m_block_shift, in);
219  m_data.load(in);
220  m_rank_samples.load(in);
221  }
222 
223  template <typename archive_t>
224  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
225  {
226  ar(CEREAL_NVP(m_size));
227  ar(CEREAL_NVP(m_block_num));
228  ar(CEREAL_NVP(m_superblocks));
229  ar(CEREAL_NVP(m_block_shift));
230  ar(CEREAL_NVP(m_data));
231  ar(CEREAL_NVP(m_rank_samples));
232  }
233 
234  template <typename archive_t>
235  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
236  {
237  ar(CEREAL_NVP(m_size));
238  ar(CEREAL_NVP(m_block_num));
239  ar(CEREAL_NVP(m_superblocks));
240  ar(CEREAL_NVP(m_block_shift));
241  ar(CEREAL_NVP(m_data));
242  ar(CEREAL_NVP(m_rank_samples));
243  }
244 
245  iterator begin() const { return iterator(this, 0); }
246 
247  iterator end() const { return iterator(this, size()); }
248 
249  bool operator==(const bit_vector_il & v) const { return m_size == v.m_size && m_data == v.m_data; }
250 
251  bool operator!=(const bit_vector_il & v) const { return !(*this == v); }
252 };
253 
254 template <uint8_t t_b, uint32_t t_bs>
256 {
257  static_assert(t_b == 1 or t_b == 0, "rank_support_il only supports bitpatterns 0 or 1.");
258 
259  public:
262  enum
263  {
264  bit_pat = t_b
265  };
266  enum
267  {
268  bit_pat_len = (uint8_t)1
269  };
270 
271  private:
272  const bit_vector_type * m_v;
273  size_type m_block_shift;
274  size_type m_block_mask;
275  size_type m_block_size_U64;
276 
277  inline size_type rank1(size_type i) const
278  {
279  size_type SBlockNum = i >> m_block_shift;
280  size_type SBlockPos = (SBlockNum << m_block_size_U64) + SBlockNum;
281  uint64_t resp = m_v->m_data[SBlockPos];
282  const uint64_t * B = (m_v->m_data.data() + (SBlockPos + 1));
283  uint64_t rem = i & 63;
284  uint64_t bits = (i & m_block_mask) - rem;
285  while (bits)
286  {
287  resp += bits::cnt(*B++);
288  bits -= 64;
289  }
290  resp += bits::cnt(*B & bits::lo_set[rem]);
291  return resp;
292  }
293 
294  inline size_type rank0(size_type i) const
295  {
296  size_type SBlockNum = i >> m_block_shift;
297  size_type SBlockPos = (SBlockNum << m_block_size_U64) + SBlockNum;
298  uint64_t resp = (SBlockNum << m_block_shift) - m_v->m_data[SBlockPos];
299  const uint64_t * B = (m_v->m_data.data() + (SBlockPos + 1));
300  uint64_t rem = i & 63;
301  uint64_t bits = (i & m_block_mask) - rem;
302  while (bits)
303  {
304  resp += bits::cnt(~(*B));
305  B++;
306  bits -= 64;
307  }
308  resp += bits::cnt((~(*B)) & bits::lo_set[rem]);
309  return resp;
310  }
311 
312  public:
313  rank_support_il(const bit_vector_type * v = nullptr)
314  {
315  set_vector(v);
316  m_block_shift = bits::hi(t_bs);
317  m_block_mask = t_bs - 1;
318  m_block_size_U64 = bits::hi(t_bs >> 6);
319  }
320 
323  {
324  if (t_b) return rank1(i);
325  return rank0(i);
326  }
327 
328  size_type operator()(size_type i) const { return rank(i); }
329 
330  size_type size() const { return m_v->size(); }
331 
332  void set_vector(const bit_vector_type * v = nullptr) { m_v = v; }
333 
335  {
336  if (this != &rs) { set_vector(rs.m_v); }
337  return *this;
338  }
339 
340  void load(std::istream &, const bit_vector_type * v = nullptr) { set_vector(v); }
341 
342  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
343  {
344  return serialize_empty_object(out, v, name, this);
345  }
346 
347  template <typename archive_t>
348  void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
349  {}
350 
351  template <typename archive_t>
352  void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
353  {}
354 
355  bool operator==(const rank_support_il & other) const noexcept { return (*m_v == *other.m_v); }
356 
357  bool operator!=(const rank_support_il & other) const noexcept { return !(*this == other); }
358 };
359 
360 template <uint8_t t_b, uint32_t t_bs>
362 {
363  static_assert(t_b == 1 or t_b == 0, "select_support_il only supports bitpatterns 0 or 1.");
364 
365  public:
368  enum
369  {
370  bit_pat = t_b
371  };
372  enum
373  {
374  bit_pat_len = (uint8_t)1
375  };
376 
377  private:
378  const bit_vector_type * m_v;
379  size_type m_superblocks;
380  size_type m_block_shift;
381  size_type m_block_size_U64;
382 
384  size_type select1(size_type i) const
385  {
386  size_type lb = 0, rb = m_v->m_superblocks; // search interval [lb..rb)
387  size_type res = 0;
388  size_type idx = 0; // index in m_rank_samples
389  /* binary search over super blocks */
390  // invariant: lb==0 or m_data[ pos(lb-1) ] < i
391  // m_data[ pos(rb) ] >= i, initial since i < rank(size())
392  while (lb < rb)
393  {
394  size_type mid = (lb + rb) / 2; // select mid \in [lb..rb)
395 #ifndef NOSELCACHE
396  if (idx < m_v->m_rank_samples.size())
397  {
398  if (m_v->m_rank_samples[idx] >= i)
399  {
400  idx = (idx << 1) + 1;
401  rb = mid;
402  }
403  else
404  {
405  idx = (idx << 1) + 2;
406  lb = mid + 1;
407  }
408  }
409  else
410  {
411 #endif
412  size_type pos = (mid << m_block_size_U64) + mid;
413  // ^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^
414  // data blocks to jump superblock position
415  if (m_v->m_data[pos] >= i) { rb = mid; }
416  else
417  {
418  lb = mid + 1;
419  }
420 #ifndef NOSELCACHE
421  }
422 #endif
423  }
424  res = (rb - 1) << m_block_shift;
425  /* iterate in 64 bit steps */
426  const uint64_t * w = m_v->m_data.data() + ((rb - 1) << m_block_size_U64) + (rb - 1);
427  i -= *w; // subtract the cumulative sum before the superblock
428  ++w; /* step into the data */
429  size_type ones = bits::cnt(*w);
430  while (ones < i)
431  {
432  i -= ones;
433  ++w;
434  ones = bits::cnt(*w);
435  res += 64;
436  }
437  /* handle last word */
438  res += bits::sel(*w, i);
439  return res;
440  }
441 
443  size_type select0(size_type i) const
444  {
445  size_type lb = 0, rb = m_v->m_superblocks; // search interval [lb..rb)
446  size_type res = 0;
447  size_type idx = 0; // index in m_rank_samples
448  /* binary search over super blocks */
449  // invariant: lb==0 or m_data[ pos(lb-1) ] < i
450  // m_data[ pos(rb) ] >= i, initial since i < rank(size())
451  while (lb < rb)
452  {
453  size_type mid = (lb + rb) / 2; // select mid \in [lb..rb)
454 #ifndef NOSELCACHE
455  if (idx < m_v->m_rank_samples.size())
456  {
457  if (((mid << m_block_shift) - m_v->m_rank_samples[idx]) >= i)
458  {
459  idx = (idx << 1) + 1;
460  rb = mid;
461  }
462  else
463  {
464  idx = (idx << 1) + 2;
465  lb = mid + 1;
466  }
467  }
468  else
469  {
470 #endif
471  size_type pos = (mid << m_block_size_U64) + mid;
472  // ^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^
473  // data blocks to jump superblock position
474  if (((mid << m_block_shift) - m_v->m_data[pos]) >= i) { rb = mid; }
475  else
476  {
477  lb = mid + 1;
478  }
479 #ifndef NOSELCACHE
480  }
481 #endif
482  }
483  res = (rb - 1) << m_block_shift;
484 
485  /* iterate in 64 bit steps */
486  const uint64_t * w = m_v->m_data.data() + ((rb - 1) << m_block_size_U64) + (rb - 1);
487  i = i - (res - *w); // substract the cumulative sum before the superblock
488  ++w; /* step into the data */
489  size_type zeros = bits::cnt(~*w);
490  while (zeros < i)
491  {
492  i -= zeros;
493  ++w;
494  zeros = bits::cnt(~*w);
495  res += 64;
496  }
497  /* handle last word */
498  res += bits::sel(~*w, i);
499  return res;
500  }
501 
502  public:
503  select_support_il(const bit_vector_type * v = nullptr)
504  {
505  set_vector(v);
506  m_block_shift = bits::hi(t_bs);
507  m_block_size_U64 = bits::hi(t_bs >> 6);
508  }
509 
512  {
513  if (t_b) return select1(i);
514  return select0(i);
515  }
516 
517  size_type operator()(size_type i) const { return select(i); }
518 
519  size_type size() const { return m_v->size(); }
520 
521  void set_vector(const bit_vector_type * v = nullptr) { m_v = v; }
522 
524  {
525  if (this != &rs) { set_vector(rs.m_v); }
526  return *this;
527  }
528 
529  void load(std::istream &, const bit_vector_type * v = nullptr) { set_vector(v); }
530 
531  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
532  {
533  return serialize_empty_object(out, v, name, this);
534  }
535 
536  template <typename archive_t>
537  void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
538  {}
539 
540  template <typename archive_t>
541  void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
542  {}
543 
544  bool operator==(const select_support_il & other) const noexcept { return (*m_v == *other.m_v); }
545 
546  bool operator!=(const select_support_il & other) const noexcept { return !(*this == other); }
547 };
548 
549 } // end namespace sdsl
550 #endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:30
A bit vector which interleaves the original bit_vector with rank information.
bit_vector_il(bit_vector_il &&)=default
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
void load(std::istream &in)
Loads the data structure from the given istream.
bit_vector_il(const bit_vector_il &)=default
bit_vector::difference_type difference_type
value_type operator[](size_type i) const
Accessing the i-th element of the original bit_vector.
uint64_t get_int(size_type idx, uint8_t len=64) const
Get the integer value of the binary string of length len starting at position idx.
bit_vector_il & operator=(bit_vector_il &&)=default
select_support_il< 1, t_bs > select_1_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
bool operator==(const bit_vector_il &v) const
bool operator!=(const bit_vector_il &v) const
size_type size() const
Returns the size of the original bit vector.
bit_vector_il & operator=(const bit_vector_il &)=default
select_support_il< 0, t_bs > select_0_type
rank_support_il< 1, t_bs > rank_1_type
random_access_const_iterator< bit_vector_il > iterator
iterator begin() const
bit_vector_il(const bit_vector &bv)
bit_vector::size_type size_type
iterator end() const
rank_support_il< 0, t_bs > rank_0_type
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
int_vector_size_type size_type
Definition: int_vector.hpp:266
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:590
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
void resize(const size_type size)
Resize the int_vector in terms of elements.
Definition: int_vector.hpp:545
Generic iterator for a random access container.
Definition: iterators.hpp:24
void set_vector(const bit_vector_type *v=nullptr)
rank_support_il & operator=(const rank_support_il &rs)
bool operator==(const rank_support_il &other) const noexcept
rank_support_il(const bit_vector_type *v=nullptr)
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
void load(std::istream &, const bit_vector_type *v=nullptr)
size_type rank(size_type i) const
Returns the position of the i-th occurrence in the bit vector.
size_type operator()(size_type i) const
size_type size() const
bit_vector_il< t_bs > bit_vector_type
bool operator!=(const rank_support_il &other) const noexcept
void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
bit_vector::size_type size_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
size_type select(size_type i) const
Returns the position of the i-th occurrence in the bit vector.
bool operator!=(const select_support_il &other) const noexcept
size_type operator()(size_type i) const
void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
bool operator==(const select_support_il &other) const noexcept
size_type size() const
select_support_il(const bit_vector_type *v=nullptr)
void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
bit_vector_il< t_bs > bit_vector_type
void load(std::istream &, const bit_vector_type *v=nullptr)
bit_vector::size_type size_type
select_support_il & operator=(const select_support_il &rs)
void set_vector(const bit_vector_type *v=nullptr)
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
int_vector.hpp contains the sdsl::int_vector class.
iterators.hpp contains an generic iterator for random access containers.
Namespace for the succinct data structure library.
bits_impl<> bits
Definition: bits.hpp:972
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
constexpr bool power_of_two(T x)
size_t serialize_empty_object(std::ostream &, structure_tree_node *v=nullptr, std::string name="", const T *t=nullptr)
Definition: io.hpp:325
A helper class for bitwise tricks on 64 bit words.
Definition: bits.hpp:56
static SDSL_CONSTEXPR uint32_t sel(uint64_t x, uint32_t i)
Calculate the position of the i-th rightmost 1 bit in the 64bit integer x.
Definition: bits.hpp:594
constexpr static uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition: bits.hpp:197
static SDSL_CONSTEXPR uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:661
static SDSL_CONSTEXPR uint64_t cnt(uint64_t x)
Counts the number of set bits in x.
Definition: bits.hpp:494
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.