SDSL  3.0.0
Succinct Data Structure Library
wm_int.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
9 #ifndef INCLUDED_SDSL_WM_INT
10 #define INCLUDED_SDSL_WM_INT
11 
12 #include <algorithm> // for std::swap
13 #include <map> // for mapping a symbol to its lexicographical index
14 #include <queue>
15 #include <set> // for calculating the alphabet size
16 #include <stdexcept>
17 #include <utility>
18 #include <vector>
19 
20 #include <sdsl/int_vector.hpp>
21 #include <sdsl/rank_support_v.hpp>
22 #include <sdsl/sdsl_concepts.hpp>
24 #include <sdsl/util.hpp>
25 #include <sdsl/wt_helper.hpp>
26 
28 namespace sdsl
29 {
30 
32 
47 template <class t_bitvector = bit_vector,
48  class t_rank = typename t_bitvector::rank_1_type,
49  class t_select = typename t_bitvector::select_1_type,
50  class t_select_zero = typename t_bitvector::select_0_type>
51 class wm_int
52 {
53  public:
56  typedef typename t_bitvector::difference_type difference_type;
59  typedef t_bitvector bit_vector_type;
60  typedef t_rank rank_1_type;
61  typedef t_select select_1_type;
62  typedef t_select_zero select_0_type;
65  enum
66  {
67  lex_ordered = 0
68  };
69 
70  typedef std::pair<value_type, size_type> point_type;
71  typedef std::vector<point_type> point_vec_type;
72  typedef std::pair<size_type, point_vec_type> r2d_res_type;
73 
74  struct node_type;
75 
76  protected:
78  size_type m_sigma = 0; //<- \f$ |\Sigma| \f$
79  bit_vector_type m_tree; // bit vector to store the wavelet tree
80  rank_1_type m_tree_rank; // rank support for the wavelet tree bit vector
81  select_1_type m_tree_select1; // select support for the wavelet tree bit vector
83  uint32_t m_max_level = 0;
84  int_vector<64> m_zero_cnt; // m_zero_cnt[i] contains the number of zeros in level i
85  int_vector<64> m_rank_level; // m_rank_level[i] contains m_tree_rank(i*size())
86 
87  public:
88  const size_type & sigma = m_sigma;
90  const uint32_t & max_level = m_max_level;
91 
93  wm_int() = default;
94 
96 
104  template <typename t_it>
105  wm_int(t_it begin, t_it end, std::string tmp_dir = ram_file_name(""))
106  : m_size(std::distance(begin, end))
107  {
108  if (0 == m_size) return;
109  m_sigma = 0;
110 
111  value_type max_elem = 1; // variable for the biggest value in rac
112  for (auto it = begin; it != end; ++it)
113  {
114  value_type value = *it;
115  if (value > max_elem) max_elem = value;
116  }
117  m_max_level = bits::hi(max_elem) + 1;
118  std::string tree_out_buf_file_name = tmp_file(tmp_dir, "_m_tree");
119  {
121  std::copy(begin, end, rac.begin());
122 
123  // buffer for elements in the right node
124  std::string zero_buf_file_name = tmp_file(tmp_dir, "_zero_buf");
125  osfstream tree_out_buf(tree_out_buf_file_name,
126  std::ios::binary | std::ios::trunc | std::ios::out); // open buffer for tree
127  size_type bit_size = m_size * m_max_level;
128  int_vector<1>::write_header(bit_size, 1, tree_out_buf); // write bv header
129 
130  size_type tree_pos = 0;
131  uint64_t tree_word = 0;
132 
133  m_zero_cnt = int_vector<64>(m_max_level, 0); // zeros at level i
134 
135  for (uint32_t k = 0; k < m_max_level; ++k)
136  {
137  uint8_t width = m_max_level - k - 1;
138  const uint64_t mask = 1ULL << width;
139  uint64_t x = 0;
140  size_type zeros = 0;
141  int_vector_buffer<> zero_buf(zero_buf_file_name, std::ios::out, 1024 * 1024, m_max_level);
142  for (size_t i = 0; i < m_size; ++i)
143  {
144  x = rac[i];
145  if (x & mask)
146  {
147  tree_word |= (1ULL << (tree_pos & 0x3FULL));
148  zero_buf.push_back(x);
149  }
150  else
151  {
152  rac[zeros++] = x;
153  }
154  ++tree_pos;
155  if ((tree_pos & 0x3FULL) == 0)
156  { // if tree_pos % 64 == 0 write old word
157  tree_out_buf.write((char *)&tree_word, sizeof(tree_word));
158  tree_word = 0;
159  }
160  }
161  m_zero_cnt[k] = zeros;
162  for (size_t i = zeros; i < m_size; ++i) { rac[i] = zero_buf[i - zeros]; }
163  }
164  if ((tree_pos & 0x3FULL) != 0)
165  { // if tree_pos % 64 > 0 => there are remaining entries we have to write
166  tree_out_buf.write((char *)&tree_word, sizeof(tree_word));
167  }
168  sdsl::remove(zero_buf_file_name);
169  tree_out_buf.close();
170  m_sigma = std::unique(rac.begin(), rac.end()) - rac.begin();
171  }
173  load_from_file(tree, tree_out_buf_file_name);
174  sdsl::remove(tree_out_buf_file_name);
175  m_tree = bit_vector_type(std::move(tree));
176  util::init_support(m_tree_rank, &m_tree);
177  util::init_support(m_tree_select0, &m_tree);
178  util::init_support(m_tree_select1, &m_tree);
180  for (uint32_t k = 0; k < m_rank_level.size(); ++k) { m_rank_level[k] = m_tree_rank(k * m_size); }
181  }
182 
184  wm_int(const wm_int & wt)
185  : m_size(wt.m_size)
186  , m_sigma(wt.m_sigma)
187  , m_tree(wt.m_tree)
192  , m_zero_cnt(wt.m_zero_cnt)
194  {
195  m_tree_rank.set_vector(&m_tree);
196  m_tree_select1.set_vector(&m_tree);
197  m_tree_select0.set_vector(&m_tree);
198  }
199 
201  wm_int(wm_int && wt)
202  : m_size(wt.m_size)
203  , m_sigma(wt.m_sigma)
204  , m_tree(std::move(wt.m_tree))
205  , m_tree_rank(std::move(wt.m_tree_rank))
206  , m_tree_select1(std::move(wt.m_tree_select1))
207  , m_tree_select0(std::move(wt.m_tree_select0))
209  , m_zero_cnt(wt.m_zero_cnt)
211  {
212  m_tree_rank.set_vector(&m_tree);
213  m_tree_select1.set_vector(&m_tree);
214  m_tree_select0.set_vector(&m_tree);
215  }
216 
218  wm_int & operator=(const wm_int & wt)
219  {
220  if (this != &wt)
221  {
222  m_size = wt.m_size;
223  m_sigma = wt.m_sigma;
224  m_tree = wt.m_tree;
226  m_tree_rank.set_vector(&m_tree);
228  m_tree_select1.set_vector(&m_tree);
230  m_tree_select0.set_vector(&m_tree);
232  m_zero_cnt = wt.m_zero_cnt;
234  }
235  return *this;
236  }
237 
240  {
241  if (this != &wt)
242  {
243  m_size = wt.m_size;
244  m_sigma = wt.m_sigma;
245  m_tree = std::move(wt.m_tree);
246  m_tree_rank = std::move(wt.m_tree_rank);
247  m_tree_rank.set_vector(&m_tree);
248  m_tree_select1 = std::move(wt.m_tree_select1);
249  m_tree_select1.set_vector(&m_tree);
250  m_tree_select0 = std::move(wt.m_tree_select0);
251  m_tree_select0.set_vector(&m_tree);
252  m_max_level = std::move(wt.m_max_level);
253  m_zero_cnt = std::move(wt.m_zero_cnt);
254  m_rank_level = std::move(wt.m_rank_level);
255  }
256  return *this;
257  }
258 
260  size_type size() const { return m_size; }
261 
263  bool empty() const { return m_size == 0; }
264 
266 
272  {
273  assert(i < size());
274  value_type res = 0;
275  for (uint32_t k = 0; k < m_max_level; ++k)
276  {
277  res <<= 1;
278  size_type rank_ones = m_tree_rank(i) - m_rank_level[k];
279  if (m_tree[i])
280  { // one at position i => follow right child
281  i = (k + 1) * m_size + m_zero_cnt[k] + rank_ones;
282  res |= 1;
283  }
284  else
285  { // zero at position i => follow left child
286  auto rank_zeros = (i - k * m_size) - rank_ones;
287  i = (k + 1) * m_size + rank_zeros;
288  }
289  }
290  return res;
291  };
292 
294 
304  {
305  assert(i <= size());
306  if (((1ULL) << (m_max_level)) <= c)
307  { // c is greater than any symbol in wt
308  return 0;
309  }
310  size_type b = 0; // start position of the interval
311  uint64_t mask = (1ULL) << (m_max_level - 1);
312  for (uint32_t k = 0; k < m_max_level and i; ++k)
313  {
314  size_type rank_b = m_tree_rank(b);
315  size_type ones = m_tree_rank(b + i) - rank_b; // ones in [b..i)
316  size_type ones_p = rank_b - m_rank_level[k]; // ones in [level_b..b)
317  if (c & mask)
318  { // search for a one at this level
319  i = ones;
320  b = (k + 1) * m_size + m_zero_cnt[k] + ones_p;
321  }
322  else
323  { // search for a zero at this level
324  i = i - ones;
325  b = (k + 1) * m_size + (b - k * m_size - ones_p);
326  }
327  mask >>= 1;
328  }
329  return i;
330  };
331 
333 
339  std::pair<size_type, value_type> inverse_select(size_type i) const
340  {
341  assert(i < size());
342  value_type c = 0;
343  size_type b = 0; // start position of the interval
344  uint64_t mask = (1ULL) << (m_max_level - 1);
345  for (uint32_t k = 0; k < m_max_level; ++k)
346  {
347  size_type rank_b = m_tree_rank(b);
348  size_type ones = m_tree_rank(b + i) - rank_b; // ones in [b..i)
349  size_type ones_p = rank_b - m_rank_level[k]; // ones in [level_b..b)
350  c <<= 1;
351  if (m_tree[b + i])
352  { // go to the right child
353  i = ones;
354  b = (k + 1) * m_size + m_zero_cnt[k] + ones_p;
355  c |= 1;
356  }
357  else
358  { // go to the left child
359  i = i - ones;
360  b = (k + 1) * m_size + (b - k * m_size - ones_p);
361  }
362  mask >>= 1;
363  }
364  return std::make_pair(i, c);
365  }
366 
368 
377  {
378  assert(1 <= i and i <= rank(size(), c));
379  uint64_t mask = 1ULL << (m_max_level - 1);
380  int_vector<64> m_path_off(max_level + 1);
381  int_vector<64> m_path_rank_off(max_level + 1);
382  m_path_off[0] = m_path_rank_off[0] = 0;
383  size_type b = 0; // start position of the interval
384  size_type r = i;
385  for (uint32_t k = 0; k < m_max_level and i; ++k)
386  {
387  size_type rank_b = m_tree_rank(b);
388  size_type ones = m_tree_rank(b + r) - rank_b; // ones in [b..i)
389  size_type ones_p = rank_b - m_rank_level[k]; // ones in [0..b)
390  if (c & mask)
391  { // search for a one at this level
392  r = ones;
393  b = (k + 1) * m_size + m_zero_cnt[k] + ones_p;
394  }
395  else
396  { // search for a zero at this level
397  r = r - ones;
398  b = (k + 1) * m_size + (b - k * m_size - ones_p);
399  }
400  mask >>= 1;
401  m_path_off[k + 1] = b;
402  m_path_rank_off[k] = rank_b;
403  }
404  mask = 1ULL;
405  for (uint32_t k = m_max_level; k > 0; --k)
406  {
407  b = m_path_off[k - 1];
408  size_type rank_b = m_path_rank_off[k - 1];
409  if (c & mask)
410  { // right child => search i'th one
411  i = m_tree_select1(rank_b + i) - b + 1;
412  }
413  else
414  { // left child => search i'th zero
415  i = m_tree_select0(b - rank_b + i) - b + 1;
416  }
417  mask <<= 1;
418  }
419  return i - 1;
420  };
421 
423 
431  std::pair<size_type, std::vector<std::pair<value_type, size_type>>> range_search_2d(size_type lb,
432  size_type rb,
433  value_type vlb,
434  value_type vrb,
435  bool report = true) const
436  {
437 
438  if (vrb > (1ULL << m_max_level)) vrb = (1ULL << m_max_level);
439  if (vlb > vrb) return make_pair(0, point_vec_type());
440  size_type cnt_answers = 0;
441  point_vec_type point_vec;
442  if (lb <= rb)
443  {
444  std::vector<size_type> is(m_max_level + 1);
445  std::vector<size_type> rank_off(m_max_level + 1);
446  _range_search_2d(root(), { { lb, rb } }, vlb, vrb, 0, is, rank_off, point_vec, report, cnt_answers);
447  }
448  return make_pair(cnt_answers, point_vec);
449  }
450 
452  range_type r,
453  value_type vlb,
454  value_type vrb,
455  size_type ilb,
456  std::vector<size_type> & is,
457  std::vector<size_type> & rank_off,
458  point_vec_type & point_vec,
459  bool report,
460  size_type & cnt_answers) const
461  {
462  using std::get;
463  if (get<0>(r) > get<1>(r)) return;
464  is[v.level] = v.offset + get<0>(r);
465 
466  if (v.level == m_max_level)
467  {
468  for (size_type j = 1; j <= sdsl::size(r) and report; ++j)
469  {
470  size_type i = j;
471  size_type c = v.sym;
472  for (uint32_t k = m_max_level; k > 0; --k)
473  {
474  size_type offset = is[k - 1];
475  size_type rank_offset = rank_off[k - 1];
476  if (c & 1) { i = m_tree_select1(rank_offset + i) - offset + 1; }
477  else
478  {
479  i = m_tree_select0(offset - rank_offset + i) - offset + 1;
480  }
481  c >>= 1;
482  }
483  point_vec.emplace_back(is[0] + i - 1, v.sym);
484  }
485  cnt_answers += sdsl::size(r);
486  return;
487  }
488  else
489  {
490  rank_off[v.level] = m_tree_rank(is[v.level]);
491  }
492  size_type irb = ilb + (1ULL << (m_max_level - v.level));
493  size_type mid = (irb + ilb) >> 1;
494 
495  auto c_v = expand(v);
496  auto c_r = expand(v, r);
497 
498  if (!sdsl::empty(get<0>(c_r)) and vlb < mid and mid)
499  {
500  _range_search_2d(get<0>(c_v),
501  get<0>(c_r),
502  vlb,
503  std::min(vrb, mid - 1),
504  ilb,
505  is,
506  rank_off,
507  point_vec,
508  report,
509  cnt_answers);
510  }
511  if (!sdsl::empty(get<1>(c_r)) and vrb >= mid)
512  {
513  _range_search_2d(get<1>(c_v),
514  get<1>(c_r),
515  std::max(mid, vlb),
516  vrb,
517  mid,
518  is,
519  rank_off,
520  point_vec,
521  report,
522  cnt_answers);
523  }
524  }
525 
527  const_iterator begin() const { return const_iterator(this, 0); }
528 
530  const_iterator end() const { return const_iterator(this, size()); }
531 
533  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
534  {
535  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
536  size_type written_bytes = 0;
537  written_bytes += write_member(m_size, out, child, "size");
538  written_bytes += write_member(m_sigma, out, child, "sigma");
539  written_bytes += m_tree.serialize(out, child, "tree");
540  written_bytes += m_tree_rank.serialize(out, child, "tree_rank");
541  written_bytes += m_tree_select1.serialize(out, child, "tree_select_1");
542  written_bytes += m_tree_select0.serialize(out, child, "tree_select_0");
543  written_bytes += write_member(m_max_level, out, child, "max_level");
544  written_bytes += m_zero_cnt.serialize(out, child, "zero_cnt");
545  written_bytes += m_rank_level.serialize(out, child, "rank_level");
546  structure_tree::add_size(child, written_bytes);
547  return written_bytes;
548  }
549 
551  void load(std::istream & in)
552  {
553  read_member(m_size, in);
554  read_member(m_sigma, in);
555  m_tree.load(in);
556  m_tree_rank.load(in, &m_tree);
557  m_tree_select1.load(in, &m_tree);
558  m_tree_select0.load(in, &m_tree);
560  m_zero_cnt.load(in);
561  m_rank_level.load(in);
562  }
563 
565  template <typename archive_t>
566  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
567  {
568  ar(CEREAL_NVP(m_size));
569  ar(CEREAL_NVP(m_sigma));
570  ar(CEREAL_NVP(m_max_level));
571  ar(CEREAL_NVP(m_tree));
572  ar(CEREAL_NVP(m_tree_rank));
575  ar(CEREAL_NVP(m_zero_cnt));
577  }
578 
580  template <typename archive_t>
581  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
582  {
583  ar(CEREAL_NVP(m_size));
584  ar(CEREAL_NVP(m_sigma));
585  ar(CEREAL_NVP(m_max_level));
586  ar(CEREAL_NVP(m_tree));
587  ar(CEREAL_NVP(m_tree_rank));
588  m_tree_rank.set_vector(&m_tree);
590  m_tree_select1.set_vector(&m_tree);
592  m_tree_select0.set_vector(&m_tree);
593  ar(CEREAL_NVP(m_zero_cnt));
595  }
596 
598  bool operator==(wm_int const & other) const noexcept
599  {
600  return (m_size == other.m_size) && (m_sigma == other.m_sigma) && (m_max_level == other.m_max_level) &&
601  (m_tree == other.m_tree) && (m_tree_rank == other.m_tree_rank) &&
602  (m_tree_select1 == other.m_tree_select1) && (m_tree_select0 == other.m_tree_select0) &&
603  (m_zero_cnt == other.m_zero_cnt) && (m_rank_level == other.m_rank_level);
604  }
605 
607  bool operator!=(wm_int const & other) const noexcept { return !(*this == other); }
608 
610  struct node_type
611  {
616 
617  // Default constructor
618  node_type(size_type o = 0, size_type sz = 0, size_type l = 0, value_type sy = 0)
619  : offset(o)
620  , size(sz)
621  , level(l)
622  , sym(sy)
623  {}
624 
625  // Copy constructor
626  node_type(const node_type &) = default;
627 
628  // Move copy constructor
629  node_type(node_type &&) = default;
630 
631  // Assignment operator
632  node_type & operator=(const node_type &) = default;
633 
634  // Move assignment operator
635  node_type & operator=(node_type &&) = default;
636 
637  // Comparator operator
638  bool operator==(const node_type & v) const { return offset == v.offset; }
639 
640  // Smaller operator
641  bool operator<(const node_type & v) const { return offset < v.offset; }
642 
643  // Greater operator
644  bool operator>(const node_type & v) const { return offset > v.offset; }
645  };
646 
648  bool is_leaf(const node_type & v) const { return v.level == m_max_level; }
649 
651  value_type sym(const node_type & v) const { return v.sym; }
652 
655  {
657  }
658 
660  auto seq(const node_type & v) const -> random_access_container<std::function<value_type(size_type)>>
661  {
662  return random_access_container<std::function<value_type(size_type)>>(
663  [&v, this](size_type i) {
664  node_type vv = v;
665  while (!is_leaf(vv))
666  {
667  auto vs = expand(vv);
668  auto rs = expand(vv, { 0, i });
669  bool bit = *(begin(vv) + i);
670  i = std::get<1>(rs[bit]);
671  vv = vs[bit];
672  }
673  return sym(vv);
674  },
675  size(v));
676  }
677 
679  bool empty(const node_type & v) const { return v.size == (size_type)0; }
680 
682  auto size(const node_type & v) const -> decltype(v.size) { return v.size; }
683 
685  node_type root() const { return node_type(0, m_size, 0, 0); }
686 
688 
692  std::array<node_type, 2> expand(const node_type & v) const
693  {
694  node_type v_right = v;
695  return expand(std::move(v_right));
696  }
697 
699 
703  std::array<node_type, 2> expand(node_type && v) const
704  {
705  node_type v_left;
706  size_type rank_b = m_tree_rank(v.offset);
707  size_type ones = m_tree_rank(v.offset + v.size) - rank_b; // ones in [b..size)
708  size_type ones_p = rank_b - m_rank_level[v.level]; // ones in [level_b..b)
709 
710  v_left.offset = (v.level + 1) * m_size + (v.offset - v.level * m_size) - ones_p;
711  v_left.size = v.size - ones;
712  v_left.level = v.level + 1;
713  v_left.sym = v.sym << 1;
714 
715  v.offset = (v.level + 1) * m_size + m_zero_cnt[v.level] + ones_p;
716  v.size = ones;
717  v.level = v.level + 1;
718  v.sym = (v.sym << 1) | 1;
719 
720  return { { std::move(v_left), v } };
721  }
722 
724 
733  std::array<range_vec_type, 2> expand(const node_type & v, const range_vec_type & ranges) const
734  {
735  auto ranges_copy = ranges;
736  return expand(v, std::move(ranges_copy));
737  }
738 
740 
749  std::array<range_vec_type, 2> expand(const node_type & v, range_vec_type && ranges) const
750  {
751  auto v_sp_rank = m_tree_rank(v.offset); // this is already calculated in expand(v)
752  range_vec_type res(ranges.size());
753  size_t i = 0;
754  for (auto & r : ranges)
755  {
756  auto sp_rank = m_tree_rank(v.offset + r[0]);
757  auto right_size = m_tree_rank(v.offset + r[1] + 1) - sp_rank;
758  auto left_size = (r[1] - r[0] + 1) - right_size;
759 
760  auto right_sp = sp_rank - v_sp_rank;
761  auto left_sp = r[0] - right_sp;
762 
763  r = { { left_sp, left_sp + left_size - 1 } };
764  res[i++] = { { right_sp, right_sp + right_size - 1 } };
765  }
766  return { { ranges, std::move(res) } };
767  }
768 
770 
779  std::array<range_type, 2> expand(const node_type & v, const range_type & r) const
780  {
781  auto v_sp_rank = m_tree_rank(v.offset); // this is already calculated in expand(v)
782  auto sp_rank = m_tree_rank(v.offset + r[0]);
783  auto right_size = m_tree_rank(v.offset + r[1] + 1) - sp_rank;
784  auto left_size = (r[1] - r[0] + 1) - right_size;
785 
786  auto right_sp = sp_rank - v_sp_rank;
787  auto left_sp = r[0] - right_sp;
788 
789  return { { { { left_sp, left_sp + left_size - 1 } }, { { right_sp, right_sp + right_size - 1 } } } };
790  }
791 
793  std::pair<uint64_t, uint64_t> path(value_type c) const { return { m_max_level, c }; }
794 
795  private:
797  auto begin(const node_type & v) const -> decltype(m_tree.begin() + v.offset) { return m_tree.begin() + v.offset; }
798 
800  auto end(const node_type & v) const -> decltype(m_tree.begin() + v.offset + v.size)
801  {
802  return m_tree.begin() + v.offset + v.size;
803  }
804 };
805 
806 } // end namespace sdsl
807 #endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:30
void push_back(const uint64_t value)
Appends the given element value to the end of the int_vector_buffer.
A generic vector class for integers of width .
Definition: int_vector.hpp:253
iterator end() noexcept
Iterator that points to the element after the last element of int_vector.
Definition: int_vector.hpp:788
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
static uint64_t write_header(uint64_t size, uint8_t int_width, std::ostream &out)
Write the size and int_width of a int_vector.
Definition: int_vector.hpp:830
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
Definition: int_vector.hpp:783
void close()
Close the stream.
Definition: sfstream.hpp:87
Generic iterator for a random access container.
Definition: iterators.hpp:24
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
A wavelet tree class for integer sequences.
Definition: wm_int.hpp:52
uint32_t m_max_level
Definition: wm_int.hpp:83
const_iterator end() const
Returns a const_iterator to the element after the last element.
Definition: wm_int.hpp:530
t_bitvector::difference_type difference_type
Definition: wm_int.hpp:56
wm_int(const wm_int &wt)
Copy constructor.
Definition: wm_int.hpp:184
std::pair< size_type, point_vec_type > r2d_res_type
Definition: wm_int.hpp:72
const_iterator iterator
Definition: wm_int.hpp:58
void load(std::istream &in)
Loads the data structure from the given istream.
Definition: wm_int.hpp:551
std::array< range_vec_type, 2 > expand(const node_type &v, range_vec_type &&ranges) const
Returns for each range its left and right child ranges.
Definition: wm_int.hpp:749
value_type operator[](size_type i) const
Recovers the i-th symbol of the original vector.
Definition: wm_int.hpp:271
void _range_search_2d(node_type v, range_type r, value_type vlb, value_type vrb, size_type ilb, std::vector< size_type > &is, std::vector< size_type > &rank_off, point_vec_type &point_vec, bool report, size_type &cnt_answers) const
Definition: wm_int.hpp:451
int_alphabet_tag alphabet_category
Definition: wm_int.hpp:64
const size_type & sigma
Effective alphabet size of the wavelet tree.
Definition: wm_int.hpp:88
t_rank rank_1_type
Definition: wm_int.hpp:60
t_bitvector bit_vector_type
Definition: wm_int.hpp:59
value_type sym(const node_type &v) const
Symbol of leaf node v.
Definition: wm_int.hpp:651
std::array< range_vec_type, 2 > expand(const node_type &v, const range_vec_type &ranges) const
Returns for each range its left and right child ranges.
Definition: wm_int.hpp:733
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Load via cereal.
Definition: wm_int.hpp:581
rank_1_type m_tree_rank
Definition: wm_int.hpp:80
auto seq(const node_type &v) const -> random_access_container< std::function< value_type(size_type)>>
Random access container to sequence of node v.
Definition: wm_int.hpp:660
size_type m_size
Definition: wm_int.hpp:77
wt_tag index_category
Definition: wm_int.hpp:63
std::array< node_type, 2 > expand(const node_type &v) const
Returns the two child nodes of an inner node.
Definition: wm_int.hpp:692
size_type size() const
Returns the size of the original vector.
Definition: wm_int.hpp:260
int_vector< 64 > m_rank_level
Definition: wm_int.hpp:85
bool operator==(wm_int const &other) const noexcept
Equality operator.
Definition: wm_int.hpp:598
std::pair< size_type, std::vector< std::pair< value_type, size_type > > > range_search_2d(size_type lb, size_type rb, value_type vlb, value_type vrb, bool report=true) const
range_search_2d searches points in the index interval [lb..rb] and value interval [vlb....
Definition: wm_int.hpp:431
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal.
Definition: wm_int.hpp:566
wm_int & operator=(const wm_int &wt)
Assignment operator.
Definition: wm_int.hpp:218
const_iterator begin() const
Returns a const_iterator to the first element.
Definition: wm_int.hpp:527
bool empty() const
Returns whether the wavelet tree contains no data.
Definition: wm_int.hpp:263
wm_int()=default
Default constructor.
int_vector< 64 > m_zero_cnt
Definition: wm_int.hpp:84
size_type m_sigma
Definition: wm_int.hpp:78
wm_int & operator=(wm_int &&wt)
Assignment move operator.
Definition: wm_int.hpp:239
int_vector ::value_type value_type
Definition: wm_int.hpp:55
auto bit_vec(const node_type &v) const -> node_bv_container< t_bitvector >
Random access container to bitvector of node v.
Definition: wm_int.hpp:654
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many occurrences of symbol wt[i] are in the prefix [0..i-1] of the original sequence.
Definition: wm_int.hpp:339
std::vector< point_type > point_vec_type
Definition: wm_int.hpp:71
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition: wm_int.hpp:533
std::pair< value_type, size_type > point_type
Definition: wm_int.hpp:70
random_access_const_iterator< wm_int > const_iterator
Definition: wm_int.hpp:57
bool operator!=(wm_int const &other) const noexcept
Inequality operator.
Definition: wm_int.hpp:607
std::array< range_type, 2 > expand(const node_type &v, const range_type &r) const
Returns for a range its left and right child ranges.
Definition: wm_int.hpp:779
t_select_zero select_0_type
Definition: wm_int.hpp:62
node_type root() const
Return the root node.
Definition: wm_int.hpp:685
t_select select_1_type
Definition: wm_int.hpp:61
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1] of the supported vector.
Definition: wm_int.hpp:303
bit_vector_type m_tree
Definition: wm_int.hpp:79
const bit_vector_type & tree
A concatenation of all bit vectors of the wavelet tree.
Definition: wm_int.hpp:89
select_1_type m_tree_select1
Definition: wm_int.hpp:81
size_type select(size_type i, value_type c) const
Calculates the i-th occurrence of the symbol c in the supported vector.
Definition: wm_int.hpp:376
select_0_type m_tree_select0
Definition: wm_int.hpp:82
std::array< node_type, 2 > expand(node_type &&v) const
Returns the two child nodes of an inner node.
Definition: wm_int.hpp:703
bool is_leaf(const node_type &v) const
Checks if the node is a leaf node.
Definition: wm_int.hpp:648
const uint32_t & max_level
Maximal level of the wavelet tree.
Definition: wm_int.hpp:90
std::pair< uint64_t, uint64_t > path(value_type c) const
return the path to the leaf for a given symbol
Definition: wm_int.hpp:793
int_vector ::size_type size_type
Definition: wm_int.hpp:54
bool empty(const node_type &v) const
Indicates if node v is empty.
Definition: wm_int.hpp:679
wm_int(t_it begin, t_it end, std::string tmp_dir=ram_file_name(""))
Construct the wavelet tree from a sequence defined by two interators.
Definition: wm_int.hpp:105
auto size(const node_type &v) const -> decltype(v.size)
Return the size of node v.
Definition: wm_int.hpp:682
wm_int(wm_int &&wt)
Move copy constructor.
Definition: wm_int.hpp:201
int_vector.hpp contains the sdsl::int_vector class.
Namespace for the succinct data structure library.
std::string ram_file_name(const std::string &file)
Returns the corresponding RAM-file name for file.
Definition: ram_fs.hpp:174
bool empty(const range_type &r)
Empty range check.
Definition: wt_helper.hpp:782
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
std::string tmp_file(const cache_config &config, std::string name_part="")
Returns a name for a temporary file. I.e. the name was not used before.
Definition: io.hpp:698
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
Definition: int_vector.hpp:51
std::array< int_vector<>::size_type, 2 > range_type
Definition: wt_helper.hpp:20
bool load_from_file(T &v, const std::string &file)
Load sdsl-object v from a file.
Definition: io.hpp:901
std::vector< range_type > range_vec_type
Definition: wt_helper.hpp:21
int remove(const std::string &)
Remove a file.
Definition: ram_fs.hpp:194
int_vector ::size_type size(const range_type &r)
Size of a range.
Definition: wt_helper.hpp:787
rank_support_v.hpp contains rank_support_v.
Contains declarations and definitions of data structure concepts.
select_support_mcl.hpp contains classes that support a sdsl::bit_vector with constant time select inf...
static SDSL_CONSTEXPR uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:661
Represents a node in the wavelet tree.
Definition: wm_int.hpp:611
node_type & operator=(node_type &&)=default
node_type(const node_type &)=default
bool operator==(const node_type &v) const
Definition: wm_int.hpp:638
bool operator>(const node_type &v) const
Definition: wm_int.hpp:644
node_type(node_type &&)=default
node_type(size_type o=0, size_type sz=0, size_type l=0, value_type sy=0)
Definition: wm_int.hpp:618
node_type & operator=(const node_type &)=default
bool operator<(const node_type &v) const
Definition: wm_int.hpp:641
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.