SDSL  3.0.0
Succinct Data Structure Library
wt_int.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
10 #ifndef INCLUDED_SDSL_INT_WAVELET_TREE
11 #define INCLUDED_SDSL_INT_WAVELET_TREE
12 
13 #include <algorithm> // for std::swap
14 #include <map> // for mapping a symbol to its lexicographical index
15 #include <queue>
16 #include <set> // for calculating the alphabet size
17 #include <stdexcept>
18 #include <utility>
19 #include <vector>
20 
21 #include <sdsl/int_vector.hpp>
22 #include <sdsl/rank_support_v.hpp>
23 #include <sdsl/sdsl_concepts.hpp>
25 #include <sdsl/util.hpp>
26 #include <sdsl/wt_helper.hpp>
27 
29 namespace sdsl
30 {
31 
33 
44 template <class t_bitvector = bit_vector,
45  class t_rank = typename t_bitvector::rank_1_type,
46  class t_select = typename t_bitvector::select_1_type,
47  class t_select_zero = typename t_bitvector::select_0_type>
48 class wt_int
49 {
50  public:
53  typedef typename t_bitvector::difference_type difference_type;
56  typedef t_bitvector bit_vector_type;
57  typedef t_rank rank_1_type;
58  typedef t_select select_1_type;
59  typedef t_select_zero select_0_type;
62  enum
63  {
64  lex_ordered = 1
65  };
66 
67  typedef std::pair<value_type, size_type> point_type;
68  typedef std::vector<point_type> point_vec_type;
69  typedef std::pair<size_type, point_vec_type> r2d_res_type;
70 
71  protected:
73  size_type m_sigma = 0; //<- \f$ |\Sigma| \f$
74  bit_vector_type m_tree; // bit vector to store the wavelet tree
75  rank_1_type m_tree_rank; // rank support for the wavelet tree bit vector
76  select_1_type m_tree_select1; // select support for the wavelet tree bit vector
78  uint32_t m_max_level = 0;
79 
80  private:
81  // recursive internal version of the method interval_symbols
82  void _interval_symbols(size_type i,
83  size_type j,
84  size_type & k,
85  std::vector<value_type> & cs,
86  std::vector<size_type> & rank_c_i,
87  std::vector<size_type> & rank_c_j,
88  size_type level,
90  size_type node_size,
91  size_type offset) const
92  {
93  // invariant: j>i
94 
95  if (level >= m_max_level)
96  {
97  rank_c_i[k] = i;
98  rank_c_j[k] = j;
99  cs[k++] = path;
100  return;
101  }
102 
103  size_type ones_before_o = m_tree_rank(offset);
104  size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
105  size_type ones_before_j = m_tree_rank(offset + j) - ones_before_o;
106  size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
107 
108  // goto left child
109  if ((j - i) - (ones_before_j - ones_before_i) > 0)
110  {
111  size_type new_offset = offset + m_size;
112  size_type new_node_size = node_size - ones_before_end;
113  size_type new_i = i - ones_before_i;
114  size_type new_j = j - ones_before_j;
115  _interval_symbols(new_i, new_j, k, cs, rank_c_i, rank_c_j, level + 1, path << 1, new_node_size, new_offset);
116  }
117 
118  // goto right child
119  if ((ones_before_j - ones_before_i) > 0)
120  {
121  size_type new_offset = offset + (node_size - ones_before_end) + m_size;
122  size_type new_node_size = ones_before_end;
123  size_type new_i = ones_before_i;
124  size_type new_j = ones_before_j;
125  _interval_symbols(new_i,
126  new_j,
127  k,
128  cs,
129  rank_c_i,
130  rank_c_j,
131  level + 1,
132  (path << 1) | 1,
133  new_node_size,
134  new_offset);
135  }
136  }
137 
138  public:
139  const size_type & sigma = m_sigma;
141  const uint32_t & max_level = m_max_level;
142 
144  wt_int() = default;
145 
147 
155  template <typename t_it>
156  wt_int(t_it begin, t_it end, std::string tmp_dir = ram_file_name(""))
157  : m_size(std::distance(begin, end))
158  {
159  if (0 == m_size) return;
160  m_sigma = 0;
161 
162  value_type max_elem = 1; // variable for the biggest value in rac
163  for (auto it = begin; it != end; ++it)
164  {
165  value_type value = *it;
166  if (value > max_elem) max_elem = value;
167  }
168  m_max_level = bits::hi(max_elem) + 1;
169  std::string tree_out_buf_file_name = tmp_file(tmp_dir, "_m_tree");
170  {
172  std::copy(begin, end, rac.begin());
173 
174  // buffer for elements in the right node
175  int_vector_buffer<> buf1(tmp_file(tmp_dir, "_wt_constr_buf"), std::ios::out, 10 * (1 << 20), m_max_level);
176  osfstream tree_out_buf(tree_out_buf_file_name, std::ios::binary | std::ios::trunc | std::ios::out);
177 
178  size_type bit_size = m_size * m_max_level;
179  int_vector<1>::write_header(bit_size, 1, tree_out_buf); // write bv header
180 
181  size_type tree_pos = 0;
182  uint64_t tree_word = 0;
183 
184  uint64_t mask_old = 1ULL << (m_max_level);
185  for (uint32_t k = 0; k < m_max_level; ++k)
186  {
187  size_type start = 0;
188  const uint64_t mask_new = 1ULL << (m_max_level - k - 1);
189  do {
190  size_type i = start;
191  size_type cnt0 = 0;
192  size_type cnt1 = 0;
193  uint64_t start_value = (rac[i] & mask_old);
194  uint64_t x;
195  while (i < m_size and ((x = rac[i]) & mask_old) == start_value)
196  {
197  if (x & mask_new)
198  {
199  tree_word |= (1ULL << (tree_pos & 0x3FULL));
200  buf1[cnt1++] = x;
201  }
202  else
203  {
204  rac[start + cnt0++] = x;
205  }
206  ++tree_pos;
207  if ((tree_pos & 0x3FULL) == 0)
208  { // if tree_pos % 64 == 0 write old word
209  tree_out_buf.write((char *)&tree_word, sizeof(tree_word));
210  tree_word = 0;
211  }
212  ++i;
213  }
214  if (k + 1 < m_max_level)
215  { // inner node
216  for (size_type j = 0; j < cnt1; ++j) { rac[start + cnt0 + j] = buf1[j]; }
217  }
218  else
219  { // leaf node
220  m_sigma += (cnt0 > 0) + (cnt1 > 0); // increase sigma for each leaf
221  }
222  start += cnt0 + cnt1;
223  } while (start < m_size);
224  mask_old += mask_new;
225  }
226  if ((tree_pos & 0x3FULL) != 0)
227  { // if tree_pos % 64 > 0 => there are remaining entries we have to write
228  tree_out_buf.write((char *)&tree_word, sizeof(tree_word));
229  }
230  buf1.close(true); // remove temporary file
231  tree_out_buf.close();
232  } // destruct rac
233 
235  load_from_file(tree, tree_out_buf_file_name);
236  sdsl::remove(tree_out_buf_file_name);
237  m_tree = bit_vector_type(std::move(tree));
238  util::init_support(m_tree_rank, &m_tree);
239  util::init_support(m_tree_select0, &m_tree);
240  util::init_support(m_tree_select1, &m_tree);
241  }
242 
244  wt_int(const wt_int & wt)
245  {
246  m_size = wt.m_size;
247  m_sigma = wt.m_sigma;
248  m_tree = wt.m_tree;
250  m_tree_rank.set_vector(&m_tree);
252  m_tree_select1.set_vector(&m_tree);
254  m_tree_select0.set_vector(&m_tree);
256  }
257 
259  wt_int(wt_int && wt)
260  : m_size(wt.m_size)
261  , m_sigma(wt.m_sigma)
262  , m_tree(std::move(wt.m_tree))
263  , m_tree_rank(std::move(wt.m_tree_rank))
264  , m_tree_select1(std::move(wt.m_tree_select1))
265  , m_tree_select0(std::move(wt.m_tree_select0))
267  {
268  m_tree_rank.set_vector(&m_tree);
269  m_tree_select1.set_vector(&m_tree);
270  m_tree_select0.set_vector(&m_tree);
271  }
272 
274  wt_int & operator=(const wt_int & wt)
275  {
276  if (this != &wt)
277  {
278  wt_int tmp(wt); // re-use copy-constructor
279  *this = std::move(tmp); // re-use move-assignment
280  }
281  return *this;
282  }
283 
286  {
287  if (this != &wt)
288  {
289  m_size = wt.m_size;
290  m_sigma = wt.m_sigma;
291  m_tree = std::move(wt.m_tree);
292  m_tree_rank = std::move(wt.m_tree_rank);
293  m_tree_rank.set_vector(&m_tree);
294  m_tree_select1 = std::move(wt.m_tree_select1);
295  m_tree_select1.set_vector(&m_tree);
296  m_tree_select0 = std::move(wt.m_tree_select0);
297  m_tree_select0.set_vector(&m_tree);
298  m_max_level = std::move(wt.m_max_level);
299  }
300  return *this;
301  }
302 
304  size_type size() const { return m_size; }
305 
307  bool empty() const { return m_size == 0; }
308 
310 
316  {
317  assert(i < size());
318  size_type offset = 0;
319  value_type res = 0;
320  size_type node_size = m_size;
321  for (uint32_t k = 0; k < m_max_level; ++k)
322  {
323  res <<= 1;
324  size_type ones_before_o = m_tree_rank(offset);
325  size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
326  size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
327  if (m_tree[offset + i])
328  { // one at position i => follow right child
329  offset += (node_size - ones_before_end);
330  node_size = ones_before_end;
331  i = ones_before_i;
332  res |= 1;
333  }
334  else
335  { // zero at position i => follow left child
336  node_size = (node_size - ones_before_end);
337  i = (i - ones_before_i);
338  }
339  offset += m_size;
340  }
341  return res;
342  };
343 
345 
355  {
356  assert(i <= size());
357  if (((1ULL) << (m_max_level)) <= c)
358  { // c is greater than any symbol in wt
359  return 0;
360  }
361  size_type offset = 0;
362  uint64_t mask = (1ULL) << (m_max_level - 1);
363  size_type node_size = m_size;
364  for (uint32_t k = 0; k < m_max_level and i; ++k)
365  {
366  size_type ones_before_o = m_tree_rank(offset);
367  size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
368  size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
369  if (c & mask)
370  { // search for a one at this level
371  offset += (node_size - ones_before_end);
372  node_size = ones_before_end;
373  i = ones_before_i;
374  }
375  else
376  { // search for a zero at this level
377  node_size = (node_size - ones_before_end);
378  i = (i - ones_before_i);
379  }
380  offset += m_size;
381  mask >>= 1;
382  }
383  return i;
384  };
385 
387 
393  std::pair<size_type, value_type> inverse_select(size_type i) const
394  {
395  assert(i < size());
396 
397  value_type c = 0;
398  size_type node_size = m_size, offset = 0;
399  for (uint32_t k = 0; k < m_max_level; ++k)
400  {
401  size_type ones_before_o = m_tree_rank(offset);
402  size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
403  size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
404  c <<= 1;
405  if (m_tree[offset + i])
406  { // go to the right child
407  offset += (node_size - ones_before_end);
408  node_size = ones_before_end;
409  i = ones_before_i;
410  c |= 1;
411  }
412  else
413  { // go to the left child
414  node_size = (node_size - ones_before_end);
415  i = (i - ones_before_i);
416  }
417  offset += m_size;
418  }
419  return std::make_pair(i, c);
420  }
421 
423 
432  {
433  assert(1 <= i and i <= rank(size(), c));
434  // possible optimization: if the array is a permutation we can start at the bottom of the tree
435  size_type offset = 0;
436  uint64_t mask = (1ULL) << (m_max_level - 1);
437  size_type node_size = m_size;
438  int_vector<64> m_path_off(max_level + 1);
439  int_vector<64> m_path_rank_off(max_level + 1);
440  m_path_off[0] = m_path_rank_off[0] = 0;
441 
442  for (uint32_t k = 0; k < m_max_level and node_size; ++k)
443  {
444  size_type ones_before_o = m_tree_rank(offset);
445  m_path_rank_off[k] = ones_before_o;
446  size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
447  if (c & mask)
448  { // search for a one at this level
449  offset += (node_size - ones_before_end);
450  node_size = ones_before_end;
451  }
452  else
453  { // search for a zero at this level
454  node_size = (node_size - ones_before_end);
455  }
456  offset += m_size;
457  m_path_off[k + 1] = offset;
458  mask >>= 1;
459  }
460  if (0ULL == node_size or node_size < i)
461  {
462  throw std::logic_error("select(" + util::to_string(i) + "," + util::to_string(c) +
463  "): c does not occur i times in the WT");
464  return m_size;
465  }
466  mask = 1ULL;
467  for (uint32_t k = m_max_level; k > 0; --k)
468  {
469  offset = m_path_off[k - 1];
470  size_type ones_before_o = m_path_rank_off[k - 1];
471  if (c & mask)
472  { // right child => search i'th
473  i = m_tree_select1(ones_before_o + i) - offset + 1;
474  }
475  else
476  { // left child => search i'th zero
477  i = m_tree_select0(offset - ones_before_o + i) - offset + 1;
478  }
479  mask <<= 1;
480  }
481  return i - 1;
482  };
483 
485 
506  size_type j,
507  size_type & k,
508  std::vector<value_type> & cs,
509  std::vector<size_type> & rank_c_i,
510  std::vector<size_type> & rank_c_j) const
511  {
512  assert(i <= j and j <= size());
513  k = 0;
514  if (i == j) { return; }
515  if ((i + 1) == j)
516  {
517  auto res = inverse_select(i);
518  cs[0] = res.second;
519  rank_c_i[0] = res.first;
520  rank_c_j[0] = res.first + 1;
521  k = 1;
522  return;
523  }
524 
525  _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, 0, 0, m_size, 0);
526  }
527 
529 
541  template <class t_ret_type = std::tuple<size_type, size_type, size_type>>
542  t_ret_type lex_count(size_type i, size_type j, value_type c) const
543  {
544  assert(i <= j and j <= size());
545  if (((1ULL) << (m_max_level)) <= c)
546  { // c is greater than any symbol in wt
547  return t_ret_type{ 0, j - i, 0 };
548  }
549  size_type offset = 0;
550  size_type smaller = 0;
551  size_type greater = 0;
552  uint64_t mask = (1ULL) << (m_max_level - 1);
553  size_type node_size = m_size;
554  for (uint32_t k = 0; k < m_max_level; ++k)
555  {
556  size_type ones_before_o = m_tree_rank(offset);
557  size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
558  size_type ones_before_j = m_tree_rank(offset + j) - ones_before_o;
559  size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
560  if (c & mask)
561  { // search for a one at this level
562  offset += (node_size - ones_before_end);
563  node_size = ones_before_end;
564  smaller += j - i - ones_before_j + ones_before_i;
565  i = ones_before_i;
566  j = ones_before_j;
567  }
568  else
569  { // search for a zero at this level
570  node_size -= ones_before_end;
571  greater += ones_before_j - ones_before_i;
572  i -= ones_before_i;
573  j -= ones_before_j;
574  }
575  offset += m_size;
576  mask >>= 1;
577  }
578  return t_ret_type{ i, smaller, greater };
579  }
580 
582 
591  template <class t_ret_type = std::tuple<size_type, size_type>>
592  t_ret_type lex_smaller_count(size_type i, value_type c) const
593  {
594  assert(i <= size());
595  if (((1ULL) << (m_max_level)) <= c)
596  { // c is greater than any symbol in wt
597  return t_ret_type{ 0, i };
598  }
599  size_type offset = 0;
600  size_type result = 0;
601  uint64_t mask = (1ULL) << (m_max_level - 1);
602  size_type node_size = m_size;
603  for (uint32_t k = 0; k < m_max_level and i; ++k)
604  {
605  size_type ones_before_o = m_tree_rank(offset);
606  size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
607  size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
608  if (c & mask)
609  { // search for a one at this level
610  offset += (node_size - ones_before_end);
611  node_size = ones_before_end;
612  result += i - ones_before_i;
613  i = ones_before_i;
614  }
615  else
616  { // search for a zero at this level
617  node_size = (node_size - ones_before_end);
618  i -= ones_before_i;
619  }
620  offset += m_size;
621  mask >>= 1;
622  }
623  return t_ret_type{ i, result };
624  }
625 
627 
635  std::pair<size_type, std::vector<std::pair<value_type, size_type>>> range_search_2d(size_type lb,
636  size_type rb,
637  value_type vlb,
638  value_type vrb,
639  bool report = true) const
640  {
641  std::vector<size_type> offsets(m_max_level + 1);
642  std::vector<size_type> ones_before_os(m_max_level + 1);
643  offsets[0] = 0;
644  if (vrb > (1ULL << m_max_level)) vrb = (1ULL << m_max_level);
645  if (vlb > vrb) return make_pair(0, point_vec_type());
646  size_type cnt_answers = 0;
647  point_vec_type point_vec;
648  _range_search_2d(lb, rb, vlb, vrb, 0, 0, m_size, offsets, ones_before_os, 0, point_vec, report, cnt_answers);
649  return make_pair(cnt_answers, point_vec);
650  }
651 
653  size_type rb,
654  value_type vlb,
655  value_type vrb,
656  size_type level,
657  size_type ilb,
658  size_type node_size,
659  std::vector<size_type> & offsets,
660  std::vector<size_type> & ones_before_os,
661  size_type path,
662  point_vec_type & point_vec,
663  bool report,
664  size_type & cnt_answers) const
665  {
666  if (lb > rb) return;
667  if (level == m_max_level)
668  {
669  if (report)
670  {
671  for (size_type j = lb + 1; j <= rb + 1; ++j)
672  {
673  size_type i = j;
674  size_type c = path;
675  for (uint32_t k = m_max_level; k > 0; --k)
676  {
677  size_type offset = offsets[k - 1];
678  size_type ones_before_o = ones_before_os[k - 1];
679  if (c & 1) { i = m_tree_select1(ones_before_o + i) - offset + 1; }
680  else
681  {
682  i = m_tree_select0(offset - ones_before_o + i) - offset + 1;
683  }
684  c >>= 1;
685  }
686  point_vec.emplace_back(i - 1, path);
687  }
688  }
689  cnt_answers += rb - lb + 1;
690  return;
691  }
692  size_type irb = ilb + (1ULL << (m_max_level - level));
693  size_type mid = (irb + ilb) >> 1;
694 
695  size_type offset = offsets[level];
696 
697  size_type ones_before_o = m_tree_rank(offset);
698  ones_before_os[level] = ones_before_o;
699  size_type ones_before_lb = m_tree_rank(offset + lb);
700  size_type ones_before_rb = m_tree_rank(offset + rb + 1);
701  size_type ones_before_end = m_tree_rank(offset + node_size);
702  size_type zeros_before_o = offset - ones_before_o;
703  size_type zeros_before_lb = offset + lb - ones_before_lb;
704  size_type zeros_before_rb = offset + rb + 1 - ones_before_rb;
705  size_type zeros_before_end = offset + node_size - ones_before_end;
706  if (vlb < mid and mid)
707  {
708  size_type nlb = zeros_before_lb - zeros_before_o;
709  size_type nrb = zeros_before_rb - zeros_before_o;
710  offsets[level + 1] = offset + m_size;
711  if (nrb)
712  _range_search_2d(nlb,
713  nrb - 1,
714  vlb,
715  std::min(vrb, mid - 1),
716  level + 1,
717  ilb,
718  zeros_before_end - zeros_before_o,
719  offsets,
720  ones_before_os,
721  path << 1,
722  point_vec,
723  report,
724  cnt_answers);
725  }
726  if (vrb >= mid)
727  {
728  size_type nlb = ones_before_lb - ones_before_o;
729  size_type nrb = ones_before_rb - ones_before_o;
730  offsets[level + 1] = offset + m_size + (zeros_before_end - zeros_before_o);
731  if (nrb)
732  _range_search_2d(nlb,
733  nrb - 1,
734  std::max(mid, vlb),
735  vrb,
736  level + 1,
737  mid,
738  ones_before_end - ones_before_o,
739  offsets,
740  ones_before_os,
741  (path << 1) + 1,
742  point_vec,
743  report,
744  cnt_answers);
745  }
746  }
747 
749  const_iterator begin() const { return const_iterator(this, 0); }
750 
752  const_iterator end() const { return const_iterator(this, size()); }
753 
755  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
756  {
757  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
758  size_type written_bytes = 0;
759  written_bytes += write_member(m_size, out, child, "size");
760  written_bytes += write_member(m_sigma, out, child, "sigma");
761  written_bytes += m_tree.serialize(out, child, "tree");
762  written_bytes += m_tree_rank.serialize(out, child, "tree_rank");
763  written_bytes += m_tree_select1.serialize(out, child, "tree_select_1");
764  written_bytes += m_tree_select0.serialize(out, child, "tree_select_0");
765  written_bytes += write_member(m_max_level, out, child, "max_level");
766  structure_tree::add_size(child, written_bytes);
767  return written_bytes;
768  }
769 
771  void load(std::istream & in)
772  {
773  read_member(m_size, in);
774  read_member(m_sigma, in);
775  m_tree.load(in);
776  m_tree_rank.load(in, &m_tree);
777  m_tree_select1.load(in, &m_tree);
778  m_tree_select0.load(in, &m_tree);
780  }
781 
782  template <typename archive_t>
783  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
784  {
785  ar(CEREAL_NVP(m_size));
786  ar(CEREAL_NVP(m_sigma));
787  ar(CEREAL_NVP(m_max_level));
788  ar(CEREAL_NVP(m_tree));
789  ar(CEREAL_NVP(m_tree_rank));
792  }
793 
794  template <typename archive_t>
795  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
796  {
797  ar(CEREAL_NVP(m_size));
798  ar(CEREAL_NVP(m_sigma));
799  ar(CEREAL_NVP(m_max_level));
800  ar(CEREAL_NVP(m_tree));
801  ar(CEREAL_NVP(m_tree_rank));
802  m_tree_rank.set_vector(&m_tree);
804  m_tree_select1.set_vector(&m_tree);
806  m_tree_select0.set_vector(&m_tree);
807  }
808 
810  bool operator==(wt_int const & other) const noexcept
811  {
812  return (m_size == other.m_size) && (m_sigma == other.m_sigma) && (m_tree == other.m_tree) &&
813  (m_tree_rank == other.m_tree_rank) && (m_tree_select1 == other.m_tree_select1) &&
814  (m_tree_select0 == other.m_tree_select0) && (m_max_level == other.m_max_level);
815  }
816 
818  bool operator!=(wt_int const & other) const noexcept { return !(*this == other); }
819 
821  struct node_type
822  {
827 
828  // Default constructor
829  node_type(size_type o = 0, size_type sz = 0, size_type l = 0, value_type sy = 0)
830  : offset(o)
831  , size(sz)
832  , level(l)
833  , sym(sy)
834  {}
835 
836  // Copy constructor
837  node_type(const node_type &) = default;
838 
839  // Move copy constructor
840  node_type(node_type &&) = default;
841 
842  // Assignment operator
843  node_type & operator=(const node_type &) = default;
844 
845  // Move assignment operator
846  node_type & operator=(node_type &&) = default;
847 
848  // Comparator operator
849  bool operator==(const node_type & v) const { return offset == v.offset; }
850 
851  // Smaller operator
852  bool operator<(const node_type & v) const { return offset < v.offset; }
853 
854  // Greater operator
855  bool operator>(const node_type & v) const { return offset > v.offset; }
856  };
857 
859  bool is_leaf(const node_type & v) const { return v.level == m_max_level; }
860 
862  value_type sym(const node_type & v) const { return v.sym; }
863 
866  {
868  }
869 
871  auto seq(const node_type & v) const -> random_access_container<std::function<value_type(size_type)>>
872  {
873  return random_access_container<std::function<value_type(size_type)>>(
874  [&v, this](size_type i) {
875  node_type vv = v;
876  while (!is_leaf(vv))
877  {
878  auto vs = expand(vv);
879  auto rs = expand(vv, range_type{ { 0, i } });
880  bool bit = *(begin(vv) + i);
881  i = std::get<1>(rs[bit]);
882  vv = vs[bit];
883  }
884  return sym(vv);
885  },
886  size(v));
887  }
888 
890  bool empty(const node_type & v) const { return v.size == (size_type)0; }
891 
893  auto size(const node_type & v) const -> decltype(v.size) { return v.size; }
894 
896  node_type root() const { return node_type(0, m_size, 0, 0); }
897 
899 
903  std::array<node_type, 2> expand(const node_type & v) const
904  {
905  node_type v_right = v;
906  return expand(std::move(v_right));
907  }
908 
910 
914  std::array<node_type, 2> expand(node_type && v) const
915  {
916  node_type v_left;
917  size_type offset_rank = m_tree_rank(v.offset);
918  size_type ones = m_tree_rank(v.offset + v.size) - offset_rank;
919 
920  v_left.offset = v.offset + m_size;
921  v_left.size = v.size - ones;
922  v_left.level = v.level + 1;
923  v_left.sym = v.sym << 1;
924 
925  v.offset = v.offset + m_size + v_left.size;
926  v.size = ones;
927  v.level = v.level + 1;
928  v.sym = (v.sym << 1) | 1;
929 
930  return { { std::move(v_left), v } };
931  }
932 
934 
943  std::array<range_vec_type, 2> expand(const node_type & v, const range_vec_type & ranges) const
944  {
945  auto ranges_copy = ranges;
946  return expand(v, std::move(ranges_copy));
947  }
948 
950 
959  std::array<range_vec_type, 2> expand(const node_type & v, range_vec_type && ranges) const
960  {
961  auto v_sp_rank = m_tree_rank(v.offset); // this is already calculated in expand(v)
962  range_vec_type res(ranges.size());
963  size_t i = 0;
964  for (auto & r : ranges)
965  {
966  auto sp_rank = m_tree_rank(v.offset + r[0]);
967  auto right_size = m_tree_rank(v.offset + r[1] + 1) - sp_rank;
968  auto left_size = (r[1] - r[0] + 1) - right_size;
969 
970  auto right_sp = sp_rank - v_sp_rank;
971  auto left_sp = r[0] - right_sp;
972 
973  r = { { left_sp, left_sp + left_size - 1 } };
974  res[i++] = { { right_sp, right_sp + right_size - 1 } };
975  }
976  return { { ranges, std::move(res) } };
977  }
978 
980 
989  std::array<range_type, 2> expand(const node_type & v, const range_type & r) const
990  {
991  auto v_sp_rank = m_tree_rank(v.offset); // this is already calculated in expand(v)
992  auto sp_rank = m_tree_rank(v.offset + r[0]);
993  auto right_size = m_tree_rank(v.offset + r[1] + 1) - sp_rank;
994  auto left_size = (r[1] - r[0] + 1) - right_size;
995 
996  auto right_sp = sp_rank - v_sp_rank;
997  auto left_sp = r[0] - right_sp;
998 
999  return { { { { left_sp, left_sp + left_size - 1 } }, { { right_sp, right_sp + right_size - 1 } } } };
1000  }
1001 
1003  std::pair<uint64_t, uint64_t> path(value_type c) const { return { m_max_level, c }; }
1004 
1005  private:
1007  auto begin(const node_type & v) const -> decltype(m_tree.begin() + v.offset) { return m_tree.begin() + v.offset; }
1008 
1010  auto end(const node_type & v) const -> decltype(m_tree.begin() + v.offset + v.size)
1011  {
1012  return m_tree.begin() + v.offset + v.size;
1013  }
1014 };
1015 
1016 } // end namespace sdsl
1017 #endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:30
void close(bool remove_file=false)
Close the int_vector_buffer.
A generic vector class for integers of width .
Definition: int_vector.hpp:253
static uint64_t write_header(uint64_t size, uint8_t int_width, std::ostream &out)
Write the size and int_width of a int_vector.
Definition: int_vector.hpp:830
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
Definition: int_vector.hpp:783
void close()
Close the stream.
Definition: sfstream.hpp:87
Generic iterator for a random access container.
Definition: iterators.hpp:24
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
A wavelet tree class for integer sequences.
Definition: wt_int.hpp:49
std::pair< uint64_t, uint64_t > path(value_type c) const
return the path to the leaf for a given symbol
Definition: wt_int.hpp:1003
value_type operator[](size_type i) const
Recovers the i-th symbol of the original vector.
Definition: wt_int.hpp:315
wt_tag index_category
Definition: wt_int.hpp:60
bool operator!=(wt_int const &other) const noexcept
Inequality operator.
Definition: wt_int.hpp:818
bool operator==(wt_int const &other) const noexcept
Equality operator.
Definition: wt_int.hpp:810
std::array< range_type, 2 > expand(const node_type &v, const range_type &r) const
Returns for a range its left and right child ranges.
Definition: wt_int.hpp:989
select_0_type m_tree_select0
Definition: wt_int.hpp:77
auto bit_vec(const node_type &v) const -> node_bv_container< t_bitvector >
Random access container to bitvector of node v.
Definition: wt_int.hpp:865
node_type root() const
Return the root node.
Definition: wt_int.hpp:896
const bit_vector_type & tree
A concatenation of all bit vectors of the wavelet tree.
Definition: wt_int.hpp:140
wt_int & operator=(wt_int &&wt)
Assignment move operator.
Definition: wt_int.hpp:285
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: wt_int.hpp:783
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition: wt_int.hpp:755
auto seq(const node_type &v) const -> random_access_container< std::function< value_type(size_type)>>
Random access container to sequence of node v.
Definition: wt_int.hpp:871
size_type size() const
Returns the size of the original vector.
Definition: wt_int.hpp:304
std::pair< size_type, point_vec_type > r2d_res_type
Definition: wt_int.hpp:69
const size_type & sigma
Effective alphabet size of the wavelet tree.
Definition: wt_int.hpp:139
std::array< range_vec_type, 2 > expand(const node_type &v, range_vec_type &&ranges) const
Returns for each range its left and right child ranges.
Definition: wt_int.hpp:959
t_select_zero select_0_type
Definition: wt_int.hpp:59
uint32_t m_max_level
Definition: wt_int.hpp:78
std::pair< value_type, size_type > point_type
Definition: wt_int.hpp:67
wt_int()=default
Default constructor.
value_type sym(const node_type &v) const
Returns the symbol of leaf node v.
Definition: wt_int.hpp:862
size_type m_size
Definition: wt_int.hpp:72
int_vector ::value_type value_type
Definition: wt_int.hpp:52
const uint32_t & max_level
Maximal level of the wavelet tree.
Definition: wt_int.hpp:141
const_iterator iterator
Definition: wt_int.hpp:55
std::array< range_vec_type, 2 > expand(const node_type &v, const range_vec_type &ranges) const
Returns for each range its left and right child ranges.
Definition: wt_int.hpp:943
t_rank rank_1_type
Definition: wt_int.hpp:57
t_bitvector bit_vector_type
Definition: wt_int.hpp:56
rank_1_type m_tree_rank
Definition: wt_int.hpp:75
size_type m_sigma
Definition: wt_int.hpp:73
void interval_symbols(size_type i, size_type j, size_type &k, std::vector< value_type > &cs, std::vector< size_type > &rank_c_i, std::vector< size_type > &rank_c_j) const
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
Definition: wt_int.hpp:505
wt_int(const wt_int &wt)
Copy constructor.
Definition: wt_int.hpp:244
random_access_const_iterator< wt_int > const_iterator
Definition: wt_int.hpp:54
wt_int(t_it begin, t_it end, std::string tmp_dir=ram_file_name(""))
Construct the wavelet tree from a sequence defined by two interators.
Definition: wt_int.hpp:156
int_alphabet_tag alphabet_category
Definition: wt_int.hpp:61
wt_int(wt_int &&wt)
Move constructor.
Definition: wt_int.hpp:259
std::pair< size_type, std::vector< std::pair< value_type, size_type > > > range_search_2d(size_type lb, size_type rb, value_type vlb, value_type vrb, bool report=true) const
range_search_2d searches points in the index interval [lb..rb] and value interval [vlb....
Definition: wt_int.hpp:635
std::vector< point_type > point_vec_type
Definition: wt_int.hpp:68
size_type select(size_type i, value_type c) const
Calculates the i-th occurrence of the symbol c in the supported vector.
Definition: wt_int.hpp:431
std::array< node_type, 2 > expand(node_type &&v) const
Returns the two child nodes of an inner node.
Definition: wt_int.hpp:914
int_vector ::size_type size_type
Definition: wt_int.hpp:51
t_select select_1_type
Definition: wt_int.hpp:58
bool empty(const node_type &v) const
Indicates if node v is empty.
Definition: wt_int.hpp:890
void _range_search_2d(size_type lb, size_type rb, value_type vlb, value_type vrb, size_type level, size_type ilb, size_type node_size, std::vector< size_type > &offsets, std::vector< size_type > &ones_before_os, size_type path, point_vec_type &point_vec, bool report, size_type &cnt_answers) const
Definition: wt_int.hpp:652
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: wt_int.hpp:795
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1] of the supported vector.
Definition: wt_int.hpp:354
const_iterator end() const
Returns a const_iterator to the element after the last element.
Definition: wt_int.hpp:752
std::array< node_type, 2 > expand(const node_type &v) const
Returns the two child nodes of an inner node.
Definition: wt_int.hpp:903
t_bitvector::difference_type difference_type
Definition: wt_int.hpp:53
const_iterator begin() const
Returns a const_iterator to the first element.
Definition: wt_int.hpp:749
bit_vector_type m_tree
Definition: wt_int.hpp:74
bool is_leaf(const node_type &v) const
Checks if the node is a leaf node.
Definition: wt_int.hpp:859
void load(std::istream &in)
Loads the data structure from the given istream.
Definition: wt_int.hpp:771
wt_int & operator=(const wt_int &wt)
Assignment operator.
Definition: wt_int.hpp:274
auto size(const node_type &v) const -> decltype(v.size)
Return the size of node v.
Definition: wt_int.hpp:893
t_ret_type lex_count(size_type i, size_type j, value_type c) const
How many symbols are lexicographic smaller/greater than c in [i..j-1].
Definition: wt_int.hpp:542
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many occurrences of symbol wt[i] are in the prefix [0..i-1] of the original sequence.
Definition: wt_int.hpp:393
t_ret_type lex_smaller_count(size_type i, value_type c) const
How many symbols are lexicographic smaller than c in [0..i-1].
Definition: wt_int.hpp:592
select_1_type m_tree_select1
Definition: wt_int.hpp:76
bool empty() const
Returns whether the wavelet tree contains no data.
Definition: wt_int.hpp:307
int_vector.hpp contains the sdsl::int_vector class.
std::string to_string(const T &t, int w=1)
Namespace for the succinct data structure library.
std::string ram_file_name(const std::string &file)
Returns the corresponding RAM-file name for file.
Definition: ram_fs.hpp:174
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
std::string tmp_file(const cache_config &config, std::string name_part="")
Returns a name for a temporary file. I.e. the name was not used before.
Definition: io.hpp:698
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
Definition: int_vector.hpp:51
std::array< int_vector<>::size_type, 2 > range_type
Definition: wt_helper.hpp:20
bool load_from_file(T &v, const std::string &file)
Load sdsl-object v from a file.
Definition: io.hpp:901
std::vector< range_type > range_vec_type
Definition: wt_helper.hpp:21
int remove(const std::string &)
Remove a file.
Definition: ram_fs.hpp:194
rank_support_v.hpp contains rank_support_v.
Contains declarations and definitions of data structure concepts.
select_support_mcl.hpp contains classes that support a sdsl::bit_vector with constant time select inf...
static SDSL_CONSTEXPR uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:661
Represents a node in the wavelet tree.
Definition: wt_int.hpp:822
bool operator>(const node_type &v) const
Definition: wt_int.hpp:855
node_type(size_type o=0, size_type sz=0, size_type l=0, value_type sy=0)
Definition: wt_int.hpp:829
node_type(node_type &&)=default
node_type(const node_type &)=default
node_type & operator=(const node_type &)=default
bool operator==(const node_type &v) const
Definition: wt_int.hpp:849
bool operator<(const node_type &v) const
Definition: wt_int.hpp:852
node_type & operator=(node_type &&)=default
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.