SDSL  3.0.0
Succinct Data Structure Library
dac_vector.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
8 #ifndef SDSL_DAC_VECTOR
9 #define SDSL_DAC_VECTOR
10 
11 #include <sdsl/int_vector.hpp>
12 #include <sdsl/iterators.hpp>
13 #include <sdsl/rank_support_v5.hpp>
14 
16 namespace sdsl
17 {
18 
20 
44 template <uint8_t t_b = 4, typename t_rank = rank_support_v5<>>
46 {
47  private:
48  static_assert(t_b > 0, "dac_vector: t_b has to be larger than 0");
49  static_assert(t_b < 64, "dac_vector: t_b has to be smaller than 64");
50 
51  public:
55  typedef const value_type const_reference;
58  typedef const pointer const_pointer;
60  typedef ptrdiff_t difference_type;
61  typedef t_rank rank_support_type;
63 
64  private:
65  int_vector<t_b> m_data; // block data for every level
66  bit_vector m_overflow; // mark non-end bytes
67  rank_support_type m_overflow_rank; // rank for m_overflow
68  int_vector<64> m_level_pointer_and_rank = int_vector<64>(4, 0);
69  uint8_t m_max_level; // maximum level < (log n)/b+1
70 
71  public:
72  dac_vector() = default;
73 
75  : m_data(v.m_data)
76  , m_overflow(v.m_overflow)
77  , m_overflow_rank(v.m_overflow_rank)
78  , m_level_pointer_and_rank(v.m_level_pointer_and_rank)
79  , m_max_level(v.m_max_level)
80  {
81  m_overflow_rank.set_vector(&m_overflow);
82  }
83 
84  dac_vector(dac_vector && v) { *this = std::move(v); }
86  {
87  if (this != &v)
88  {
89  dac_vector tmp(v);
90  *this = std::move(tmp);
91  }
92  return *this;
93  }
94 
96  {
97  if (this != &v)
98  {
99  m_data = std::move(v.m_data);
100  m_overflow = std::move(v.m_overflow);
101  m_overflow_rank = std::move(v.m_overflow_rank);
102  m_overflow_rank.set_vector(&m_overflow);
103  m_level_pointer_and_rank = std::move(v.m_level_pointer_and_rank);
104  m_max_level = std::move(v.m_max_level);
105  }
106  return *this;
107  }
108 
110 
113  template <class Container>
114  dac_vector(const Container & c);
115 
117  template <uint8_t int_width>
119 
121  size_type size() const { return m_level_pointer_and_rank[2]; }
123  static size_type max_size() { return int_vector<>::max_size() / 2; }
124 
126  bool empty() const { return 0 == m_level_pointer_and_rank[2]; }
127 
129  const const_iterator begin() const { return const_iterator(this, 0); }
130 
132  const const_iterator end() const { return const_iterator(this, size()); }
133 
136  {
137  uint8_t level = 1;
138  uint8_t offset = t_b;
139  size_type result = m_data[i];
140  const uint64_t * p = m_level_pointer_and_rank.data();
141  uint64_t ppi = (*p) + i;
142  while (level < m_max_level and m_overflow[ppi])
143  {
144  p += 2;
145  ppi = *p + (m_overflow_rank(ppi) - *(p - 1));
146  result |= (m_data[ppi] << (offset));
147  ++level;
148  offset += t_b;
149  }
150  return result;
151  }
152 
154  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const;
155 
157  void load(std::istream & in)
158  {
159  m_data.load(in);
160  m_overflow.load(in);
161  m_overflow_rank.load(in, &m_overflow);
162  m_level_pointer_and_rank.load(in);
163  read_member(m_max_level, in);
164  }
165 
167  template <typename archive_t>
168  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const;
169 
170  template <typename archive_t>
171  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar);
172 
173  bool operator==(const dac_vector & v) const
174  {
175  return m_max_level && v.m_max_level && m_data == v.m_data && m_overflow == v.m_overflow &&
176  m_overflow_rank == v.m_overflow_rank && m_level_pointer_and_rank == v.m_level_pointer_and_rank;
177  }
178 
179  bool operator!=(const dac_vector & v) const { return !(*this == v); }
180 };
181 
182 template <uint8_t t_b, typename t_rank>
183 template <class Container>
185 {
186  // (1) Count for each level, how many blocks are needed for the representation
187  // Running time: \f$ O(n \times \frac{\log n}{b} \f$
188  // Result is sorted in m_level_pointer_and_rank
189  size_type n = c.size(), val = 0;
190  if (n == 0) return;
191  // initialize counter
192  m_level_pointer_and_rank = int_vector<64>(128, 0);
193  m_level_pointer_and_rank[0] = n; // level 0 has n entries
194 
195  uint8_t level_x_2 = 0;
196  uint8_t max_level_x_2 = 4;
197  for (size_type i = 0; i < n; ++i)
198  {
199  val = c[i];
200  val >>= t_b; // shift value b bits to the right
201  level_x_2 = 2;
202  while (val)
203  {
204  // increase counter for current level by 1
205  ++m_level_pointer_and_rank[level_x_2];
206  val >>= t_b; // shift value b bits to the right
207  level_x_2 += 2; // increase level by 1
208  max_level_x_2 = std::max(max_level_x_2, level_x_2);
209  }
210  }
211  m_level_pointer_and_rank.resize(max_level_x_2);
212  // (2) Determine maximum level and prefix sums of level counters
213  m_max_level = 0;
214  size_type sum_blocks = 0, last_block_size = 0;
215  for (size_type i = 0, t = 0; i < m_level_pointer_and_rank.size(); i += 2)
216  {
217  t = sum_blocks;
218  sum_blocks += m_level_pointer_and_rank[i];
219  m_level_pointer_and_rank[i] = t;
220  if (sum_blocks > t)
221  {
222  ++m_max_level;
223  last_block_size = sum_blocks - t;
224  }
225  }
226  m_overflow = bit_vector(sum_blocks - last_block_size, 0);
227  m_data.resize(sum_blocks);
228 
229  assert(last_block_size > 0);
230 
231  // (3) Enter block and overflow data
232  int_vector<64> cnt = m_level_pointer_and_rank;
233  const uint64_t mask = bits::lo_set[t_b];
234 
235  for (size_type i = 0, j = 0; i < n; ++i)
236  {
237  val = c[i];
238  j = cnt[0]++;
239  m_data[j] = val & mask;
240  val >>= t_b; // shift value b bits to the right
241  level_x_2 = 2;
242  while (val)
243  {
244  m_overflow[j] = 1;
245  // increase counter for current level by 1
246  j = cnt[level_x_2]++;
247  m_data[j] = val & mask;
248  val >>= t_b; // shift value b bits to the right
249  level_x_2 += 2; // increase level by 1
250  }
251  }
252 
253  // (4) Initialize rank data structure for m_overflow and precalc rank for
254  // pointers
255  util::init_support(m_overflow_rank, &m_overflow);
256  for (size_type i = 0;
257  2 * i < m_level_pointer_and_rank.size() and m_level_pointer_and_rank[2 * i] < m_overflow.size();
258  ++i)
259  {
260  m_level_pointer_and_rank[2 * i + 1] = m_overflow_rank(m_level_pointer_and_rank[2 * i]);
261  }
262 }
263 
264 template <uint8_t t_b, typename t_rank>
265 template <uint8_t int_width>
267 {
268  // (1) Count for each level, how many blocks are needed for the representation
269  // Running time: \f$ O(n \times \frac{\log n}{b} \f$
270  // Result is sorted in m_level_pointer_and_rank
271  size_type n = v_buf.size(), val = 0;
272  if (n == 0) return;
273  // initialize counter
274  m_level_pointer_and_rank = int_vector<64>(128, 0);
275  m_level_pointer_and_rank[0] = n; // level 0 has n entries
276 
277  uint8_t level_x_2 = 0;
278  uint8_t max_level_x_2 = 4;
279  for (size_type i = 0; i < n; ++i)
280  {
281  val = v_buf[i];
282  val >>= t_b; // shift value b bits to the right
283  level_x_2 = 2;
284  while (val)
285  {
286  // increase counter for current level by 1
287  ++m_level_pointer_and_rank[level_x_2];
288  val >>= t_b; // shift value b bits to the right
289  level_x_2 += 2; // increase level by 1
290  max_level_x_2 = std::max(max_level_x_2, level_x_2);
291  }
292  }
293  m_level_pointer_and_rank.resize(max_level_x_2);
294  // (2) Determine maximum level and prefix sums of level counters
295  m_max_level = 0;
296  size_type sum_blocks = 0, last_block_size = 0;
297  for (size_type i = 0, t = 0; i < m_level_pointer_and_rank.size(); i += 2)
298  {
299  t = sum_blocks;
300  sum_blocks += m_level_pointer_and_rank[i];
301  m_level_pointer_and_rank[i] = t;
302  if (sum_blocks > t)
303  {
304  ++m_max_level;
305  last_block_size = sum_blocks - t;
306  }
307  }
308  m_overflow = bit_vector(sum_blocks - last_block_size, 0);
309  m_data.resize(sum_blocks);
310 
311  assert(last_block_size > 0);
312 
313  // (3) Enter block and overflow data
314  int_vector<64> cnt = m_level_pointer_and_rank;
315  const uint64_t mask = bits::lo_set[t_b];
316 
317  for (size_type i = 0, j = 0; i < n; ++i)
318  {
319  val = v_buf[i];
320  j = cnt[0]++;
321  m_data[j] = val & mask;
322  val >>= t_b; // shift value b bits to the right
323  level_x_2 = 2;
324  while (val)
325  {
326  m_overflow[j] = 1;
327  // increase counter for current level by 1
328  j = cnt[level_x_2]++;
329  m_data[j] = val & mask;
330  val >>= t_b; // shift value b bits to the right
331  level_x_2 += 2; // increase level by 1
332  }
333  }
334 
335  // (4) Initialize rank data structure for m_overflow and precalc rank for
336  // pointers
337  util::init_support(m_overflow_rank, &m_overflow);
338  for (size_type i = 0;
339  2 * i < m_level_pointer_and_rank.size() and m_level_pointer_and_rank[2 * i] < m_overflow.size();
340  ++i)
341  {
342  m_level_pointer_and_rank[2 * i + 1] = m_overflow_rank(m_level_pointer_and_rank[2 * i]);
343  }
344 }
345 
346 template <uint8_t t_b, typename t_rank>
349  std::string name) const
350 {
351  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
352  size_type written_bytes = 0;
353  written_bytes += m_data.serialize(out, child, "data");
354  written_bytes += m_overflow.serialize(out, child, "overflow");
355  written_bytes += m_overflow_rank.serialize(out, child, "overflow_rank");
356  written_bytes += m_level_pointer_and_rank.serialize(out, child, "level_pointer_and_rank");
357  written_bytes += write_member(m_max_level, out, child, "max_level");
358  structure_tree::add_size(child, written_bytes);
359  return written_bytes;
360 }
361 
362 template <uint8_t t_b, typename t_rank>
363 template <typename archive_t>
365 {
366  ar(CEREAL_NVP(m_max_level));
367  ar(CEREAL_NVP(m_data));
368  ar(CEREAL_NVP(m_overflow));
369  ar(CEREAL_NVP(m_overflow_rank));
370  ar(CEREAL_NVP(m_level_pointer_and_rank));
371 }
372 
373 template <uint8_t t_b, typename t_rank>
374 template <typename archive_t>
376 {
377  ar(CEREAL_NVP(m_max_level));
378  ar(CEREAL_NVP(m_data));
379  ar(CEREAL_NVP(m_overflow));
380  ar(CEREAL_NVP(m_overflow_rank));
381  m_overflow_rank.set_vector(&m_overflow);
382  ar(CEREAL_NVP(m_level_pointer_and_rank));
383 }
384 
385 } // end namespace sdsl
386 #endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:30
A generic immutable space-saving vector class for unsigned integers.
Definition: dac_vector.hpp:46
bool operator==(const dac_vector &v) const
Definition: dac_vector.hpp:173
bool empty() const
Returns if the dac_vector is empty.
Definition: dac_vector.hpp:126
const value_type const_reference
Definition: dac_vector.hpp:55
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal.
Definition: dac_vector.hpp:364
dac_vector(dac_vector &&v)
Definition: dac_vector.hpp:84
dac_vector()=default
dac_vector & operator=(dac_vector &&v)
Definition: dac_vector.hpp:95
const_iterator iterator
Definition: dac_vector.hpp:54
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the dac_vector to a stream.
Definition: dac_vector.hpp:347
int_vector ::size_type size_type
Definition: dac_vector.hpp:59
t_rank rank_support_type
Definition: dac_vector.hpp:61
const const_iterator end() const
Iterator that points to the position after the last element of the dac_vector.
Definition: dac_vector.hpp:132
static size_type max_size()
Return the largest size that this container can ever have.
Definition: dac_vector.hpp:123
random_access_const_iterator< dac_vector > const_iterator
Definition: dac_vector.hpp:53
dac_vector & operator=(const dac_vector &v)
Definition: dac_vector.hpp:85
const_reference reference
Definition: dac_vector.hpp:56
size_type size() const
The number of elements in the dac_vector.
Definition: dac_vector.hpp:121
bool operator!=(const dac_vector &v) const
Definition: dac_vector.hpp:179
void load(std::istream &in)
Load from a stream.
Definition: dac_vector.hpp:157
int_vector ::value_type value_type
Definition: dac_vector.hpp:48
value_type operator[](size_type i) const
[]-operator
Definition: dac_vector.hpp:135
iv_tag index_category
Definition: dac_vector.hpp:62
const pointer const_pointer
Definition: dac_vector.hpp:58
const const_iterator begin() const
Iterator that points to the first element of the dac_vector.
Definition: dac_vector.hpp:129
ptrdiff_t difference_type
Definition: dac_vector.hpp:60
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: dac_vector.hpp:375
const_reference * pointer
Definition: dac_vector.hpp:57
dac_vector(const dac_vector &v)
Definition: dac_vector.hpp:74
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
Definition: int_vector.hpp:253
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:590
void load(std::istream &in)
Load the int_vector for a stream.
static size_type max_size() noexcept
Maximum size of the int_vector.
Definition: int_vector.hpp:566
Generic iterator for a random access container.
Definition: iterators.hpp:24
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
int_vector.hpp contains the sdsl::int_vector class.
iterators.hpp contains an generic iterator for random access containers.
Namespace for the succinct data structure library.
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
Definition: int_vector.hpp:51
rank_support_v5.hpp contains rank_support_v5.5
constexpr static uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition: bits.hpp:197