SDSL  3.0.0
Succinct Data Structure Library
wt_helper.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
4 #ifndef INCLUDED_SDSL_WT_HELPER
5 #define INCLUDED_SDSL_WT_HELPER
6 
7 #include <algorithm>
8 #include <array>
9 #include <deque>
10 #include <limits>
11 #include <queue>
12 #include <utility>
13 #include <vector>
14 
15 #include <sdsl/int_vector.hpp>
16 
17 namespace sdsl
18 {
19 
21 typedef std::vector<range_type> range_vec_type;
22 
24 
27 bool empty(const range_type & r);
28 
30 
34 
36 
39 template <typename t_it, typename t_rac>
40 void calculate_character_occurences(t_it begin, t_it end, t_rac & C)
41 {
42  C = t_rac();
43  for (auto it = begin; it != end; ++it)
44  {
45  uint64_t c = *it;
46  if (c >= C.size()) { C.resize(c + 1, 0); }
47  ++C[c];
48  }
49 }
50 
51 template <typename t_rac, typename sigma_type>
52 void calculate_effective_alphabet_size(const t_rac & C, sigma_type & sigma)
53 {
54  sigma = std::count_if(begin(C), end(C), [](decltype(*begin(C)) & x) { return x > 0; });
55 }
56 
57 struct pc_node
58 {
59  uint64_t freq; // frequency of symbol sym
60  uint64_t sym; // symbol
61  uint64_t parent; // pointer to the parent
62  uint64_t child[2]; // pointer to the children
63 
64  enum : uint64_t
65  {
66  undef = 0xFFFFFFFFFFFFFFFFULL
67  }; // max uint64_t value
68 
69  pc_node(uint64_t freq = 0,
70  uint64_t sym = 0,
71  uint64_t parent = undef,
72  uint64_t child_left = undef,
73  uint64_t child_right = undef);
74 };
75 
76 template <typename t_tree_strat_fat>
77 struct _node
78 {
79  using node_type = typename t_tree_strat_fat::node_type;
80  typedef uint64_t size_type;
81  uint64_t bv_pos = 0; // pointer into the bit_vector, which represents the wavelet tree
82  uint64_t bv_pos_rank = 0; // pre-calculated rank for the prefix up to but not including bv_pos
83  node_type parent = t_tree_strat_fat::undef; // pointer to the parent
84  node_type child[2] = { t_tree_strat_fat::undef, t_tree_strat_fat::undef }; // pointer to the children
85 
86  _node(uint64_t bv_pos = 0,
87  uint64_t bv_pos_rank = 0,
88  node_type parent = t_tree_strat_fat::undef,
89  node_type child_left = t_tree_strat_fat::undef,
90  node_type child_right = t_tree_strat_fat::undef)
91  : bv_pos(bv_pos)
93  , parent(parent)
94  {
95  child[0] = child_left;
96  child[1] = child_right;
97  }
98 
99  _node(const _node &) = default;
100 
101  _node & operator=(const _node & v)
102  {
103  if (this != &v)
104  {
105  bv_pos = v.bv_pos;
107  parent = v.parent;
108  child[0] = v.child[0];
109  child[1] = v.child[1];
110  }
111  return *this;
112  }
113 
114  _node & operator=(const pc_node & v)
115  {
116  bv_pos = v.freq;
117  bv_pos_rank = v.sym;
118  parent = v.parent;
119  child[0] = v.child[0];
120  child[1] = v.child[1];
121  return *this;
122  }
123 
124  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
125  {
126  structure_tree_node * st_child = structure_tree::add_child(v, name, util::class_name(*this));
127  uint64_t written_bytes = 0;
128  written_bytes += write_member(bv_pos, out);
129  written_bytes += write_member(bv_pos_rank, out);
130  written_bytes += write_member(parent, out);
131  out.write((char *)child, 2 * sizeof(child[0]));
132  written_bytes += 2 * sizeof(child[0]);
133  structure_tree::add_size(st_child, written_bytes);
134  return written_bytes;
135  }
136 
137  void load(std::istream & in)
138  {
139  read_member(bv_pos, in);
141  read_member(parent, in);
142  in.read((char *)child, 2 * sizeof(child[0]));
143  }
144 
145  template <typename archive_t>
146  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
147  {
148  ar(CEREAL_NVP(bv_pos));
149  ar(CEREAL_NVP(bv_pos_rank));
150  ar(CEREAL_NVP(parent));
151  ar(CEREAL_NVP(child[0]));
152  ar(CEREAL_NVP(child[1]));
153  }
154 
155  template <typename archive_t>
156  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
157  {
158  ar(CEREAL_NVP(bv_pos));
159  ar(CEREAL_NVP(bv_pos_rank));
160  ar(CEREAL_NVP(parent));
161  ar(CEREAL_NVP(child[0]));
162  ar(CEREAL_NVP(child[1]));
163  }
164 
166  bool operator==(_node const & other) const noexcept
167  {
168  return (bv_pos == other.bv_pos) && (bv_pos_rank == other.bv_pos_rank) && (parent == other.parent) &&
169  (child[0] == other.child[0]) && (child[1] == other.child[1]);
170  }
171 
173  bool operator!=(_node const & other) const noexcept { return !(*this == other); }
174 };
175 
176 // TODO: version of _byte_tree for lex_ordered tree shapes
177 // m_c_to_leaf can be compressed and
178 // m_path is only needed for sigma chars
179 
180 // Strategy class for tree representation of a WT
181 template <bool t_dfs_shape, typename t_wt>
183 {
185  using value_type = uint8_t;
186  using node_type = uint16_t; // node is represented by index in m_nodes
188  enum : uint16_t
189  {
190  undef = 0xFFFF
191  }; // max uint16_t value
192  enum : uint32_t
193  {
194  fixed_sigma = 256
195  };
196  enum : uint8_t
197  {
198  int_width = 8
199  }; // width of the input integers
200 
201  std::vector<data_node> m_nodes; // nodes for the prefix code tree structure
202  node_type m_c_to_leaf[fixed_sigma]; // map symbol c to a leaf in the tree structure
203  // if m_c_to_leaf[c] == undef the char does
204  // not exists in the text
205  uint64_t m_path[fixed_sigma]; // path information for each char; the bits at position
206  // 0..55 hold path information; bits 56..63 the length
207  // of the path in binary representation
208 
210 
211  _byte_tree(const std::vector<pc_node> & temp_nodes, uint64_t & bv_size, const t_wt *)
212  {
213  m_nodes.resize(temp_nodes.size());
214  m_nodes[0] = temp_nodes.back(); // insert root at index 0
215  bv_size = 0;
216  size_t node_cnt = 1;
217  node_type last_parent = undef;
218  std::deque<node_type> q;
219  q.push_back(0);
220  while (!q.empty())
221  {
222  node_type idx;
223  if (!t_dfs_shape)
224  {
225  idx = q.front();
226  q.pop_front();
227  }
228  else
229  {
230  idx = q.back();
231  q.pop_back();
232  }
233  // frq_sum is store in bv_pos value
234  uint64_t frq = m_nodes[idx].bv_pos;
235  m_nodes[idx].bv_pos = bv_size;
236  if (m_nodes[idx].child[0] != undef) // if node is not a leaf
237  bv_size += frq; // add frequency
238  if (idx > 0)
239  { // node is not the root
240  if (last_parent != m_nodes[idx].parent)
241  m_nodes[m_nodes[idx].parent].child[0] = idx;
242  else
243  m_nodes[m_nodes[idx].parent].child[1] = idx;
244  last_parent = m_nodes[idx].parent;
245  }
246  if (m_nodes[idx].child[0] != undef)
247  { // if node is not a leaf
248  for (uint32_t k = 0; k < 2; ++k)
249  { // add children to tree
250  m_nodes[node_cnt] = temp_nodes[m_nodes[idx].child[k]];
251  m_nodes[node_cnt].parent = idx;
252  q.push_back(node_cnt);
253  m_nodes[idx].child[k] = node_cnt++;
254  }
255  }
256  }
257  // initialize m_c_to_leaf
258  for (uint32_t i = 0; i < fixed_sigma; ++i)
259  m_c_to_leaf[i] = undef; // if c is not in the alphabet m_c_to_leaf[c] = undef
260  for (node_type v = 0; v < m_nodes.size(); ++v)
261  {
262  if (m_nodes[v].child[0] == undef) // if node is a leaf
263  m_c_to_leaf[(uint8_t)m_nodes[v].bv_pos_rank] = v; // calculate value
264  }
265  // initialize path information
266  // Note: In the case of a bfs search order,
267  // we can classify nodes as right child and left child with an easy criterion:
268  // node is a left child, if node%2==1
269  // node is a right child, if node%2==0
270  for (uint32_t c = 0, prev_c = 0; c < fixed_sigma; ++c)
271  {
272  if (m_c_to_leaf[c] != undef)
273  { // if char exists in the alphabet
274  node_type v = m_c_to_leaf[c];
275  uint64_t pw = 0; // path
276  uint64_t pl = 0; // path len
277  while (v != root())
278  { // while node is not the root
279  pw <<= 1;
280  if (m_nodes[m_nodes[v].parent].child[1] == v) // if the node is a right child
281  pw |= 1ULL;
282  ++pl;
283  v = m_nodes[v].parent; // go up the tree
284  }
285  if (pl > 56) { throw std::logic_error("Code depth greater than 56!!!"); }
286  m_path[c] = pw | (pl << 56);
287  prev_c = c;
288  }
289  else
290  {
291  uint64_t pl = 0; // len is 0, good for special case in rank
292  m_path[c] = prev_c | (pl << 56);
293  }
294  }
295  }
296 
297  template <typename t_rank_type>
298  void init_node_ranks(const t_rank_type & rank)
299  {
300  for (uint64_t i = 0; i < m_nodes.size(); ++i)
301  {
302  if (m_nodes[i].child[0] != undef) // if node is not a leaf
303  m_nodes[i].bv_pos_rank = rank.rank(m_nodes[i].bv_pos);
304  }
305  }
306 
307  _byte_tree(const _byte_tree & bt)
308  : m_nodes(bt.m_nodes)
309  {
310 
311  for (uint32_t i = 0; i < fixed_sigma; ++i) m_c_to_leaf[i] = bt.m_c_to_leaf[i];
312  for (uint32_t i = 0; i < fixed_sigma; ++i) m_path[i] = bt.m_path[i];
313  }
314 
316  {
317  if (this != &bt)
318  {
319  _byte_tree tmp(bt);
320  *this = std::move(tmp);
321  }
322  return *this;
323  }
324 
326  {
327  if (this != &bt)
328  {
329  m_nodes = std::move(bt.m_nodes);
330  for (uint32_t i = 0; i < fixed_sigma; ++i) m_c_to_leaf[i] = bt.m_c_to_leaf[i];
331  for (uint32_t i = 0; i < fixed_sigma; ++i) m_path[i] = bt.m_path[i];
332  }
333  return *this;
334  }
335 
337  uint64_t serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
338  {
339  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
340  uint64_t written_bytes = 0;
341  uint64_t m_nodes_size = m_nodes.size();
342  written_bytes += write_member(m_nodes_size, out, child, "m_nodes.size()");
343  written_bytes += serialize_vector(m_nodes, out, child, "m_nodes");
344  out.write((char *)m_c_to_leaf, fixed_sigma * sizeof(m_c_to_leaf[0]));
345  written_bytes += fixed_sigma * sizeof(m_c_to_leaf[0]); // bytes from previous loop
346  out.write((char *)m_path, fixed_sigma * sizeof(m_path[0]));
347  written_bytes += fixed_sigma * sizeof(m_path[0]); // bytes from previous loop
348  structure_tree::add_size(child, written_bytes);
349  return written_bytes;
350  }
351 
353  void load(std::istream & in)
354  {
355  uint64_t m_nodes_size = 0;
356  read_member(m_nodes_size, in);
357  m_nodes = std::vector<data_node>(m_nodes_size);
358  load_vector(m_nodes, in);
359  in.read((char *)m_c_to_leaf, fixed_sigma * sizeof(m_c_to_leaf[0]));
360  in.read((char *)m_path, fixed_sigma * sizeof(m_path[0]));
361  }
362 
363  template <typename archive_t>
364  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
365  {
366  ar(CEREAL_NVP(m_nodes));
367  ar(CEREAL_NVP(m_c_to_leaf));
368  ar(CEREAL_NVP(m_path));
369  }
370 
371  template <typename archive_t>
372  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
373  {
374  ar(CEREAL_NVP(m_nodes));
375  ar(CEREAL_NVP(m_c_to_leaf));
376  ar(CEREAL_NVP(m_path));
377  }
378 
380  bool operator==(_byte_tree const & other) const noexcept
381  {
382  return (m_nodes == other.m_nodes) /* && (m_c_to_leaf == other.m_c_to_leaf) &&
383  (m_path == other.m_path)*/;
384  }
385 
387  bool operator!=(_byte_tree const & other) const noexcept { return !(*this == other); }
388 
390  inline node_type c_to_leaf(value_type c) const { return m_c_to_leaf[c]; }
392  inline static node_type root() { return 0; }
393 
395  uint64_t size() const { return m_nodes.size(); }
396 
398  inline node_type parent(node_type v) const { return m_nodes[v].parent; }
400  inline node_type child(node_type v, uint8_t i) const { return m_nodes[v].child[i]; }
401 
403  inline bool is_leaf(node_type v) const { return m_nodes[v].child[0] == undef; }
404 
406  inline uint64_t size(node_type v) const
407  {
408  auto next_v = t_dfs_shape ? m_nodes[v].child[0] : v + 1;
409  return bv_pos(next_v) - bv_pos(v);
410  }
411 
413  inline uint64_t bit_path(value_type c) const { return m_path[c]; }
414 
416  inline uint64_t bv_pos(node_type v) const { return m_nodes[v].bv_pos; }
417 
419  inline uint64_t bv_pos_rank(node_type v) const { return m_nodes[v].bv_pos_rank; }
420 
422  inline bool is_valid(node_type v) const { return v != undef; }
423 
425  inline std::pair<bool, value_type> symbol_gte(value_type c) const
426  {
427  for (uint32_t i = c; i < fixed_sigma; i++)
428  {
429  if (m_c_to_leaf[i] != undef) { return { true, i }; }
430  }
431  return { false, 0 };
432  }
433 
435  inline std::pair<bool, value_type> symbol_lte(value_type c) const
436  {
437  for (uint32_t i = c; i > 0; i--)
438  {
439  if (m_c_to_leaf[i] != undef) { return { true, i }; }
440  }
441  if (m_c_to_leaf[0] != undef) return { true, 0 };
442  return { false, 0 };
443  }
444 };
445 
446 // Strategy class for tree representation of a WT
447 template <bool t_dfs_shape = false>
448 struct byte_tree
449 {
450  template <typename t_wt>
452 };
453 
454 // Strategy class for tree representation of a WT
455 template <bool t_dfs_shape, typename t_wt>
456 struct _int_tree
457 {
459  using value_type = uint64_t;
460  using node_type = uint64_t; // node is represented by index in m_nodes
462  enum : uint64_t
463  {
464  undef = 0xFFFFFFFFFFFFFFFFULL
465  }; // max uint64_t value
466  enum : uint8_t
467  {
468  int_width = 0
469  }; // width of the input integers is variable
470 
471  std::vector<data_node> m_nodes; // nodes for the prefix code tree structure
472  std::vector<node_type> m_c_to_leaf; // map symbol c to a leaf in the tree structure
473  // if m_c_to_leaf[c] == undef the char does
474  // not exists in the text
475  std::vector<uint64_t> m_path; // path information for each char; the bits at position
476  // 0..55 hold path information; bits 56..63 the length
477  // of the path in binary representation
478 
479  _int_tree() = default;
480 
481  _int_tree(const std::vector<pc_node> & temp_nodes, uint64_t & bv_size, const t_wt *)
482  {
483  m_nodes.resize(temp_nodes.size());
484  m_nodes[0] = temp_nodes.back(); // insert root at index 0
485  bv_size = 0;
486  size_t node_cnt = 1;
487  node_type last_parent = undef;
488  std::deque<node_type> q;
489  q.push_back(0);
490  uint64_t max_c = 0;
491  while (!q.empty())
492  {
493  node_type idx;
494  if (!t_dfs_shape)
495  {
496  idx = q.front();
497  q.pop_front();
498  }
499  else
500  {
501  idx = q.back();
502  q.pop_back();
503  }
504  // frq_sum is store in bv_pos value
505  uint64_t frq = m_nodes[idx].bv_pos;
506  m_nodes[idx].bv_pos = bv_size;
507  if (m_nodes[idx].child[0] != undef)
508  { // if node is not a leaf
509  bv_size += frq; // add frequency
510  }
511  else if (max_c < m_nodes[idx].bv_pos_rank)
512  { // node is leaf and contains large symbol
513  max_c = m_nodes[idx].bv_pos_rank;
514  }
515  if (idx > 0)
516  { // node is not the root
517  if (last_parent != m_nodes[idx].parent)
518  m_nodes[m_nodes[idx].parent].child[0] = idx;
519  else
520  m_nodes[m_nodes[idx].parent].child[1] = idx;
521  last_parent = m_nodes[idx].parent;
522  }
523  if (m_nodes[idx].child[0] != undef)
524  { // if node is not a leaf
525  for (uint32_t k = 0; k < 2; ++k)
526  { // add children to tree
527  m_nodes[node_cnt] = temp_nodes[m_nodes[idx].child[k]];
528  m_nodes[node_cnt].parent = idx;
529  q.push_back(node_cnt);
530  m_nodes[idx].child[k] = node_cnt++;
531  }
532  }
533  }
534  // initialize m_c_to_leaf
535  // if c is not in the alphabet m_c_to_leaf[c] = undef
536  m_c_to_leaf.resize(max_c + 1, undef);
537  for (node_type v = 0; v < m_nodes.size(); ++v)
538  {
539  if (m_nodes[v].child[0] == undef)
540  { // if node is a leaf
541  uint64_t c = m_nodes[v].bv_pos_rank;
542  m_c_to_leaf[c] = v; // calculate value
543  if (c > max_c) max_c = c;
544  }
545  }
546  m_path = std::vector<uint64_t>(m_c_to_leaf.size(), 0);
547  // initialize path information
548  // Note: In the case of a bfs search order,
549  // we can classify nodes as right child and left child with an easy criterion:
550  // node is a left child, if node%2==1
551  // node is a right child, if node%2==0
552  for (value_type c = 0, prev_c = 0; c < m_c_to_leaf.size(); ++c)
553  {
554  if (m_c_to_leaf[c] != undef)
555  { // if char exists in the alphabet
556  node_type v = m_c_to_leaf[c];
557  uint64_t w = 0; // path
558  uint64_t l = 0; // path len
559  while (v != root())
560  { // while node is not the root
561  w <<= 1;
562  if (m_nodes[m_nodes[v].parent].child[1] == v) // if the node is a right child
563  w |= 1ULL;
564  ++l;
565  v = m_nodes[v].parent; // go up the tree
566  }
567  if (l > 56) { throw std::logic_error("Code depth greater than 56!!!"); }
568  m_path[c] = w | (l << 56);
569  prev_c = c;
570  }
571  else
572  {
573  uint64_t pl = 0; // len is 0, good for special case in rank
574  m_path[c] = prev_c | (pl << 56);
575  }
576  }
577  }
578 
579  template <typename t_rank_type>
580  void init_node_ranks(const t_rank_type & rank)
581  {
582  for (uint64_t i = 0; i < m_nodes.size(); ++i)
583  {
584  if (m_nodes[i].child[0] != undef) // if node is not a leaf
585  m_nodes[i].bv_pos_rank = rank.rank(m_nodes[i].bv_pos);
586  }
587  }
588 
589  _int_tree(const _int_tree & bt) = default;
590  _int_tree(_int_tree && bt) = default;
591 
592  _int_tree & operator=(const _int_tree & bt) = default;
593  _int_tree & operator=(_int_tree && bt) = default;
594 
596  uint64_t serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
597  {
598  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
599  uint64_t written_bytes = 0;
600  uint64_t m_nodes_size = m_nodes.size();
601  written_bytes += write_member(m_nodes_size, out, child, "m_nodes.size()");
602  written_bytes += serialize_vector(m_nodes, out, child, "m_nodes");
603  uint64_t m_c_to_leaf_size = m_c_to_leaf.size();
604  written_bytes += write_member(m_c_to_leaf_size, out, child, "m_c_to_leaf.size()");
605  written_bytes += serialize_vector(m_c_to_leaf, out, child, "m_c_to_leaf");
606  uint64_t m_path_size = m_path.size();
607  written_bytes += write_member(m_path_size, out, child, "m_path.size()");
608  written_bytes += serialize_vector(m_path, out, child, "m_path");
609  structure_tree::add_size(child, written_bytes);
610  return written_bytes;
611  }
612 
614  void load(std::istream & in)
615  {
616  uint64_t m_nodes_size = 0;
617  read_member(m_nodes_size, in);
618  m_nodes = std::vector<data_node>(m_nodes_size);
619  load_vector(m_nodes, in);
620  uint64_t m_c_to_leaf_size = 0;
621  read_member(m_c_to_leaf_size, in);
622  m_c_to_leaf = std::vector<node_type>(m_c_to_leaf_size);
624  uint64_t m_path_size = 0;
625  read_member(m_path_size, in);
626  m_path = std::vector<uint64_t>(m_path_size);
627  load_vector(m_path, in);
628  }
629 
631  bool operator==(_int_tree const & other) const noexcept
632  {
633  return (m_nodes == other.m_nodes) && (m_c_to_leaf == other.m_c_to_leaf) && (m_path == other.m_path);
634  }
635 
637  bool operator!=(_int_tree const & other) const noexcept { return !(*this == other); }
638 
639  template <typename archive_t>
640  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
641  {
642  ar(CEREAL_NVP(m_nodes));
643  ar(CEREAL_NVP(m_c_to_leaf));
644  ar(CEREAL_NVP(m_path));
645  }
646 
647  template <typename archive_t>
648  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
649  {
650  ar(CEREAL_NVP(m_nodes));
651  ar(CEREAL_NVP(m_c_to_leaf));
652  ar(CEREAL_NVP(m_path));
653  }
654 
657  {
658  if (c >= m_c_to_leaf.size())
659  return undef;
660  else
661  return m_c_to_leaf[c];
662  }
664  inline static node_type root() { return 0; }
665 
667  uint64_t size() const { return m_nodes.size(); }
668 
670  inline node_type parent(node_type v) const { return m_nodes[v].parent; }
672  inline node_type child(node_type v, uint8_t i) const { return m_nodes[v].child[i]; }
673 
675  inline bool is_leaf(node_type v) const { return m_nodes[v].child[0] == undef; }
676 
678  inline uint64_t size(node_type v) const
679  {
680  auto next_v = t_dfs_shape ? m_nodes[v].child[0] : v + 1;
681  return bv_pos(next_v) - bv_pos(v);
682  }
683 
685  inline uint64_t bit_path(value_type c) const
686  {
687  if (c >= m_path.size()) { return m_path.size() - 1; }
688  return m_path[c];
689  }
690 
692  inline uint64_t bv_pos(node_type v) const { return m_nodes[v].bv_pos; }
693 
695  inline uint64_t bv_pos_rank(node_type v) const { return m_nodes[v].bv_pos_rank; }
696 
698  inline bool is_valid(node_type v) const { return v != undef; }
699 
701  inline std::pair<bool, value_type> symbol_gte(value_type c) const
702  {
703  if (c >= m_c_to_leaf.size()) { return { false, 0 }; }
704  for (value_type i = c; i < m_c_to_leaf.size(); i++)
705  {
706  if (m_c_to_leaf[i] != undef) { return { true, i }; }
707  }
708  return { false, 0 };
709  }
710 
712  inline std::pair<bool, value_type> symbol_lte(value_type c) const
713  {
714  if (c >= m_c_to_leaf.size())
715  {
716  // return the largest symbol
717  c = m_c_to_leaf.size() - 1;
718  }
719  for (value_type i = c; i > 0; i--)
720  {
721  if (m_c_to_leaf[i] != undef) { return { true, i }; }
722  }
723  if (m_c_to_leaf[0] != undef) return { true, 0 };
724  return { false, 0 };
725  }
726 };
727 
728 // Strategy class for tree representation of a WT
729 template <bool t_dfs_shape = false>
730 struct int_tree
731 {
732  template <typename t_wt>
734 };
735 
736 template <typename t_bv>
738 {
739  public:
740  typedef typename t_bv::value_type value_type;
741  typedef typename t_bv::size_type size_type;
742  typedef typename t_bv::difference_type difference_type;
743  typedef typename t_bv::const_iterator iterator;
744 
745  private:
746  iterator m_begin, m_end;
747 
748  public:
750  : m_begin(b)
751  , m_end(e)
752  {}
753  value_type operator[](size_type i) const { return *(m_begin + i); }
754  size_type size() const { return m_end - m_begin; }
755  iterator begin() const { return m_begin; }
756  iterator end() const { return m_end; }
757 };
758 
759 template <typename t_bv>
761 {
762  public:
763  typedef typename t_bv::value_type value_type;
764  typedef typename t_bv::size_type size_type;
765  typedef typename t_bv::difference_type difference_type;
766  typedef typename t_bv::const_iterator iterator;
767 
768  private:
769  iterator m_begin, m_end;
770 
771  public:
773  : m_begin(b)
774  , m_end(e)
775  {}
776  value_type operator[](size_type i) const { return *(m_begin + i); }
777  size_type size() const { return m_end - m_begin; }
778  iterator begin() const { return m_begin; }
779  iterator end() const { return m_end; }
780 };
781 
782 inline bool empty(const range_type & r)
783 {
784  return std::get<0>(r) == (std::get<1>(r) + 1);
785 }
786 
788 {
789  return std::get<1>(r) - std::get<0>(r) + 1;
790 }
791 
792 inline pc_node::pc_node(uint64_t freq, uint64_t sym, uint64_t parent, uint64_t child_left, uint64_t child_right)
793  : freq(freq)
794  , sym(sym)
795  , parent(parent)
796 {
797  child[0] = child_left;
798  child[1] = child_right;
799 }
800 
801 } // end namespace sdsl
802 #endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:30
A generic vector class for integers of width .
Definition: int_vector.hpp:253
size_type size() const
Definition: wt_helper.hpp:754
iterator begin() const
Definition: wt_helper.hpp:755
node_bv_container(iterator b, iterator e)
Definition: wt_helper.hpp:749
t_bv::size_type size_type
Definition: wt_helper.hpp:741
t_bv::const_iterator iterator
Definition: wt_helper.hpp:743
value_type operator[](size_type i) const
Definition: wt_helper.hpp:753
iterator end() const
Definition: wt_helper.hpp:756
t_bv::value_type value_type
Definition: wt_helper.hpp:740
t_bv::difference_type difference_type
Definition: wt_helper.hpp:742
iterator end() const
Definition: wt_helper.hpp:779
t_bv::size_type size_type
Definition: wt_helper.hpp:764
value_type operator[](size_type i) const
Definition: wt_helper.hpp:776
node_seq_container(iterator b, iterator e)
Definition: wt_helper.hpp:772
iterator begin() const
Definition: wt_helper.hpp:778
t_bv::difference_type difference_type
Definition: wt_helper.hpp:765
t_bv::value_type value_type
Definition: wt_helper.hpp:763
t_bv::const_iterator iterator
Definition: wt_helper.hpp:766
size_type size() const
Definition: wt_helper.hpp:777
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
int_vector.hpp contains the sdsl::int_vector class.
int_vector ::size_type size_type
Namespace for the succinct data structure library.
void load_vector(std::vector< T > &, std::istream &)
Load all elements of a vector from a input stream.
Definition: io.hpp:404
void calculate_effective_alphabet_size(const t_rac &C, sigma_type &sigma)
Definition: wt_helper.hpp:52
uint64_t serialize_vector(const std::vector< T > &, std::ostream &, sdsl::structure_tree_node *v=nullptr, std::string="")
Serialize each element of an std::vector.
Definition: io.hpp:376
void calculate_character_occurences(t_it begin, t_it end, t_rac &C)
Count for each character the number of occurrences in rac[0..size-1].
Definition: wt_helper.hpp:40
bool empty(const range_type &r)
Empty range check.
Definition: wt_helper.hpp:782
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
std::array< int_vector<>::size_type, 2 > range_type
Definition: wt_helper.hpp:20
std::vector< range_type > range_vec_type
Definition: wt_helper.hpp:21
int_vector ::size_type size(const range_type &r)
Size of a range.
Definition: wt_helper.hpp:787
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: wt_helper.hpp:364
uint64_t bv_pos(node_type v) const
Return the start of the node in the WT's bit vector.
Definition: wt_helper.hpp:416
uint8_t value_type
Definition: wt_helper.hpp:185
_byte_tree & operator=(const _byte_tree &bt)
Definition: wt_helper.hpp:315
void init_node_ranks(const t_rank_type &rank)
Definition: wt_helper.hpp:298
node_type m_c_to_leaf[fixed_sigma]
Definition: wt_helper.hpp:202
_byte_tree(const _byte_tree &bt)
Definition: wt_helper.hpp:307
_byte_tree(const std::vector< pc_node > &temp_nodes, uint64_t &bv_size, const t_wt *)
Definition: wt_helper.hpp:211
uint64_t bv_pos_rank(node_type v) const
Returns for node v the rank of 1's up to bv_pos(v)
Definition: wt_helper.hpp:419
bool is_valid(node_type v) const
Return if the node is a valid node.
Definition: wt_helper.hpp:422
static node_type root()
Return the root node of the tree.
Definition: wt_helper.hpp:392
uint16_t node_type
Definition: wt_helper.hpp:186
uint64_t bit_path(value_type c) const
Return the path as left/right bit sequence in a uint64_t.
Definition: wt_helper.hpp:413
_byte_tree & operator=(_byte_tree &&bt)
Definition: wt_helper.hpp:325
node_type child(node_type v, uint8_t i) const
Return left (i=0) or right (i=1) child node of v.
Definition: wt_helper.hpp:400
bool operator==(_byte_tree const &other) const noexcept
Equality operator.
Definition: wt_helper.hpp:380
node_type parent(node_type v) const
Return the parent node of v.
Definition: wt_helper.hpp:398
uint64_t size(node_type v) const
Return size of an inner node.
Definition: wt_helper.hpp:406
uint64_t size() const
Return the number of nodes in the tree.
Definition: wt_helper.hpp:395
std::pair< bool, value_type > symbol_gte(value_type c) const
Return symbol c or the next larger symbol in the wt.
Definition: wt_helper.hpp:425
uint64_t m_path[fixed_sigma]
Definition: wt_helper.hpp:205
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: wt_helper.hpp:372
bool operator!=(_byte_tree const &other) const noexcept
Inequality operator.
Definition: wt_helper.hpp:387
bool is_leaf(node_type v) const
Return if v is a leaf node.
Definition: wt_helper.hpp:403
std::vector< data_node > m_nodes
Definition: wt_helper.hpp:201
std::pair< bool, value_type > symbol_lte(value_type c) const
Return symbol c or the next smaller symbol in the wt.
Definition: wt_helper.hpp:435
node_type c_to_leaf(value_type c) const
Get corresponding leaf for symbol c.
Definition: wt_helper.hpp:390
void load(std::istream &in)
Loads the data structure from the given istream.
Definition: wt_helper.hpp:353
uint64_t serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition: wt_helper.hpp:337
std::pair< bool, value_type > symbol_gte(value_type c) const
Return symbol c or the next larger symbol in the wt.
Definition: wt_helper.hpp:701
_int_tree(const std::vector< pc_node > &temp_nodes, uint64_t &bv_size, const t_wt *)
Definition: wt_helper.hpp:481
_int_tree(_int_tree &&bt)=default
uint64_t bv_pos_rank(node_type v) const
Returns for node v the rank of 1's up to bv_pos(v)
Definition: wt_helper.hpp:695
void init_node_ranks(const t_rank_type &rank)
Definition: wt_helper.hpp:580
std::vector< data_node > m_nodes
Definition: wt_helper.hpp:471
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: wt_helper.hpp:648
void load(std::istream &in)
Loads the data structure from the given istream.
Definition: wt_helper.hpp:614
uint64_t bit_path(value_type c) const
Return the path as left/right bit sequence in a uint64_t.
Definition: wt_helper.hpp:685
static node_type root()
Return the root node of the tree.
Definition: wt_helper.hpp:664
_int_tree()=default
bool operator!=(_int_tree const &other) const noexcept
Inequality operator.
Definition: wt_helper.hpp:637
uint64_t value_type
Definition: wt_helper.hpp:459
std::pair< bool, value_type > symbol_lte(value_type c) const
Return symbol c or the next smaller symbol in the wt.
Definition: wt_helper.hpp:712
uint64_t bv_pos(node_type v) const
Return the start of the node in the WT's bit vector.
Definition: wt_helper.hpp:692
_int_tree & operator=(const _int_tree &bt)=default
_int_tree & operator=(_int_tree &&bt)=default
node_type child(node_type v, uint8_t i) const
Return left (i=0) or right (i=1) child node of v.
Definition: wt_helper.hpp:672
_int_tree(const _int_tree &bt)=default
uint64_t size(node_type v) const
Return size of an inner node.
Definition: wt_helper.hpp:678
node_type parent(node_type v) const
Return the parent node of v.
Definition: wt_helper.hpp:670
bool is_leaf(node_type v) const
Return if v is a leaf node.
Definition: wt_helper.hpp:675
std::vector< uint64_t > m_path
Definition: wt_helper.hpp:475
node_type c_to_leaf(value_type c) const
Get corresponding leaf for symbol c.
Definition: wt_helper.hpp:656
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: wt_helper.hpp:640
bool operator==(_int_tree const &other) const noexcept
Equality operator.
Definition: wt_helper.hpp:631
uint64_t node_type
Definition: wt_helper.hpp:460
uint64_t size() const
Return the number of nodes in the tree.
Definition: wt_helper.hpp:667
std::vector< node_type > m_c_to_leaf
Definition: wt_helper.hpp:472
uint64_t serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition: wt_helper.hpp:596
bool is_valid(node_type v) const
Return if the node is a valid node.
Definition: wt_helper.hpp:698
_node & operator=(const _node &v)
Definition: wt_helper.hpp:101
bool operator==(_node const &other) const noexcept
Equality operator.
Definition: wt_helper.hpp:166
_node & operator=(const pc_node &v)
Definition: wt_helper.hpp:114
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: wt_helper.hpp:146
_node(const _node &)=default
uint64_t size_type
Definition: wt_helper.hpp:80
uint64_t bv_pos_rank
Definition: wt_helper.hpp:82
uint64_t bv_pos
Definition: wt_helper.hpp:81
node_type child[2]
Definition: wt_helper.hpp:84
bool operator!=(_node const &other) const noexcept
Inequality operator.
Definition: wt_helper.hpp:173
node_type parent
Definition: wt_helper.hpp:83
_node(uint64_t bv_pos=0, uint64_t bv_pos_rank=0, node_type parent=t_tree_strat_fat::undef, node_type child_left=t_tree_strat_fat::undef, node_type child_right=t_tree_strat_fat::undef)
Definition: wt_helper.hpp:86
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: wt_helper.hpp:156
typename t_tree_strat_fat::node_type node_type
Definition: wt_helper.hpp:79
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Definition: wt_helper.hpp:124
void load(std::istream &in)
Definition: wt_helper.hpp:137
uint64_t freq
Definition: wt_helper.hpp:59
uint64_t parent
Definition: wt_helper.hpp:61
uint64_t child[2]
Definition: wt_helper.hpp:62
uint64_t sym
Definition: wt_helper.hpp:60
pc_node(uint64_t freq=0, uint64_t sym=0, uint64_t parent=undef, uint64_t child_left=undef, uint64_t child_right=undef)
Definition: wt_helper.hpp:792