SDSL  3.0.0
Succinct Data Structure Library
cst_sada.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
8 #ifndef INCLUDED_SDSL_CST_SADA
9 #define INCLUDED_SDSL_CST_SADA
10 
11 #include <algorithm>
12 #include <cassert>
13 #include <cstring> // for strlen
14 #include <iomanip>
15 #include <iostream>
16 #include <iterator>
17 
18 #include <sdsl/bp_support.hpp>
19 #include <sdsl/bp_support_sada.hpp>
20 #include <sdsl/construct.hpp>
21 #include <sdsl/csa_sada.hpp> // for std initialization of cst_sada
22 #include <sdsl/cst_iterators.hpp>
23 #include <sdsl/cst_sct3.hpp>
24 #include <sdsl/int_vector.hpp>
25 #include <sdsl/iterators.hpp>
27 #include <sdsl/sdsl_concepts.hpp>
32 #include <sdsl/util.hpp>
33 
34 namespace sdsl
35 {
36 
38 
67 template <class t_csa = csa_sada<>,
68  class t_lcp = lcp_support_sada<>,
69  class t_bp_support = bp_support_sada<>,
70  class t_rank_10 = rank_support_v5<10, 2>,
71  class t_select_10 = select_support_mcl<10, 2>>
72 class cst_sada
73 {
74  static_assert(std::is_same<typename index_tag<t_csa>::type, csa_tag>::value,
75  "First template argument has to be a compressed suffix array.");
76 
77  public:
80  typedef typename t_csa::size_type size_type;
81  typedef ptrdiff_t difference_type;
82  typedef t_csa csa_type;
83  typedef typename t_lcp::template type<cst_sada> lcp_type;
84  typedef typename t_csa::char_type char_type;
85  typedef typename t_csa::string_type string_type;
86  typedef size_type node_type;
87  typedef t_bp_support bp_support_type;
88  typedef t_rank_10 rank_10_type;
89  typedef t_select_10 select_10_type;
90 
91  typedef typename t_csa::alphabet_type::comp_char_type comp_char_type;
92  typedef typename t_csa::alphabet_type::sigma_type sigma_type;
93 
94  typedef typename t_csa::alphabet_category alphabet_category;
96 
97  private:
98  t_csa m_csa; // suffix array
99  lcp_type m_lcp; // lcp information
100  bit_vector m_bp; // balanced parentheses sequence for suffix tree
101  bp_support_type m_bp_support; // support for the balanced parentheses sequence
102  rank_10_type m_bp_rank10; // rank_support for leaves, i.e. "10" bit pattern
103  select_10_type m_bp_select10; // select_support for leaves, i.e. "10" bit pattern
104 
105  /* Get the number of leaves that are in the subtree rooted at the first child of v +
106  * number of leafs in the subtrees rooted at the children of parent(v) which precede v in the tree.
107  */
108  size_type inorder(node_type v) const { return m_bp_rank10(m_bp_support.find_close(v + 1) + 1); }
109 
110  public:
111  const t_csa & csa = m_csa;
112  const lcp_type & lcp = m_lcp;
113  const bit_vector & bp = m_bp;
114  const bp_support_type & bp_support = m_bp_support;
115  const rank_10_type & bp_rank_10 = m_bp_rank10;
116  const select_10_type & bp_select_10 = m_bp_select10;
117 
119  cst_sada() = default;
120 
122  cst_sada(const cst_sada & cst)
123  : m_csa(cst.m_csa)
124  , m_bp(cst.m_bp)
125  , m_bp_support(cst.m_bp_support)
126  , m_bp_rank10(cst.m_bp_rank10)
127  , m_bp_select10(cst.m_bp_select10)
128  {
129  copy_lcp(m_lcp, cst.m_lcp, *this);
130  m_bp_support.set_vector(&m_bp);
131  m_bp_rank10.set_vector(&m_bp);
132  m_bp_select10.set_vector(&m_bp);
133  }
134 
137  : m_csa(std::move(cst.m_csa))
138  , m_bp(std::move(cst.m_bp))
139  , m_bp_support(std::move(cst.m_bp_support))
140  , m_bp_rank10(std::move(cst.m_bp_rank10))
141  , m_bp_select10(std::move(cst.m_bp_select10))
142  {
143  move_lcp(m_lcp, cst.m_lcp, *this);
144  m_bp_support.set_vector(&m_bp);
145  m_bp_rank10.set_vector(&m_bp);
146  m_bp_select10.set_vector(&m_bp);
147  }
148 
151  {
152  {
153  auto event = memory_monitor::event("bps-dfs");
155 
156  const bool o_par = true;
157  const bool c_par = !o_par;
158 
159  // trim bps to maximal size of tree
160  m_bp.resize(4 * lcp.size());
161 
162  if (lcp.size() > 0)
163  {
164  // run from back to front of lcp, enumerate intervals and count
165  // opening parentheses per position i
166  sorted_stack_support stack(lcp.size() + 1);
167  stack.push(0); // for lcp[n+1]
168  size_type p = m_bp.size() - 1;
169  for (size_type i = lcp.size() - 1; i > 0; --i)
170  {
171  // compute number of opening parentheses at position i
172  size_type co = 1; // for singleton interval
173  size_type x = lcp[i] + 1; // to indicate start and end of lcp-array
174  while (stack.top() > x)
175  {
176  stack.pop();
177  ++co;
178  }
179  if (stack.top() < x) { stack.push(x); }
180  // encode number of opening parenthesis at i as unary number
181  m_bp[p--] = o_par;
182  while (--co > 0) m_bp[p--] = c_par;
183  }
184  // handle last value lcp[0] separate, since it virtually is a -1, but in real is a 0
185  m_bp[p--] = o_par; // code last number of opening parenthesis
186  while (stack.size() > 1)
187  { // remove all elements except the zero from stack for next run
188  stack.pop();
189  m_bp[p--] = c_par; // move k to first bit before unary number
190  }
191 
192  // run from front to back of lcp, enumerate intervals,
193  // write opening parentheses and leave out closing parentheses
194  size_type q = 0;
195  for (size_type i = 1; i < lcp.size(); ++i)
196  {
197  // compute number of opening parentheses at position i-1 using
198  // the unary coding from the last step
199  size_type co = 0;
200  do {
201  ++co;
202  } while (m_bp[++p] == c_par);
203 
204  // compute number of closing parentheses at position i-1
205  size_type cc = 1; // for singleton interval
206  size_type x = lcp[i] + 1;
207  while (stack.top() > x)
208  {
209  stack.pop();
210  ++cc;
211  }
212  if (stack.top() < x) { stack.push(x); }
213  // write sequence for position i-1
214  while (co-- > 0) m_bp[q++] = o_par;
215  while (cc-- > 0) m_bp[q++] = c_par;
216  }
217  // handle last value lcp[n+1] separate
218  m_bp[q++] = o_par;
219  while (!stack.empty())
220  {
221  m_bp[q++] = c_par;
222  stack.pop();
223  }
224 
225  // trim bps to correct size and stop
226  m_bp.resize(q);
227  }
228  }
229  {
230  auto event = memory_monitor::event("bpss-dfs");
231 
232  util::init_support(m_bp_support, &m_bp);
233  util::init_support(m_bp_rank10, &m_bp);
234  util::init_support(m_bp_select10, &m_bp);
235  }
236  {
237  auto event = memory_monitor::event("clcp");
238  cache_config tmp_config(false, config.dir, config.id, config.file_map);
239  construct_lcp(m_lcp, *this, tmp_config);
240  config.file_map = tmp_config.file_map;
241  }
242  {
243  auto event = memory_monitor::event("load csa");
244  load_from_cache(m_csa, std::string(conf::KEY_CSA) + "_" + util::class_to_hash(m_csa), config);
245  }
246  }
247 
249 
252  size_type size() const { return m_csa.size(); }
253 
255 
258  static size_type max_size() { return t_csa::max_size(); }
259 
261 
264  bool empty() const { return m_csa.empty(); }
265 
267 
271  {
272  if (0 == m_bp.size()) // special case: tree is uninitialized
273  return end();
274  return const_iterator(this, root(), false, true);
275  }
276 
278  const_iterator begin(const node_type & v) const
279  {
280  if (0 == m_bp.size() and root() == v) return end();
281  return const_iterator(this, v, false, true);
282  }
283 
285 
288  const_iterator end() const { return const_iterator(this, root(), true, false); }
289 
291  const_iterator end(const node_type & v) const
292  {
293  if (root() == v) return end();
294  return ++const_iterator(this, v, true, true);
295  }
296 
299  {
300  if (0 == m_bp.size()) // special case: tree is uninitialized
301  return end_bottom_up();
303  }
304 
307 
309 
312  cst_sada & operator=(const cst_sada & cst)
313  {
314  if (this != &cst)
315  {
316  cst_sada tmp(cst);
317  *this = std::move(tmp);
318  }
319  return *this;
320  }
321 
323 
327  {
328  if (this != &cst)
329  {
330  m_csa = std::move(cst.m_csa);
331  move_lcp(m_lcp, cst.m_lcp, *this);
332  m_bp = std::move(cst.m_bp);
333  m_bp_support = std::move(cst.m_bp_support);
334  m_bp_support.set_vector(&m_bp);
335  m_bp_rank10 = std::move(cst.m_bp_rank10);
336  m_bp_rank10.set_vector(&m_bp);
337  m_bp_select10 = std::move(cst.m_bp_select10);
338  m_bp_select10.set_vector(&m_bp);
339  }
340  return *this;
341  }
342 
344  bool operator==(cst_sada const & other) const noexcept
345  {
346  return (m_csa == other.m_csa) && (m_lcp == other.m_lcp) && (m_bp == other.m_bp) &&
347  (m_bp_support == other.m_bp_support) && (m_bp_rank10 == other.m_bp_rank10) &&
348  (m_bp_select10 == other.m_bp_select10);
349  }
350 
352  bool operator!=(cst_sada const & other) const noexcept { return !(*this == other); }
353 
355 
358  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
359  {
360  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
361  size_type written_bytes = 0;
362  written_bytes += m_csa.serialize(out, child, "csa");
363  written_bytes += m_lcp.serialize(out, child, "lcp");
364  written_bytes += m_bp.serialize(out, child, "bp");
365  written_bytes += m_bp_support.serialize(out, child, "bp_support");
366  written_bytes += m_bp_rank10.serialize(out, child, "bp_rank_10");
367  written_bytes += m_bp_select10.serialize(out, child, "bp_select_10");
368  structure_tree::add_size(child, written_bytes);
369  return written_bytes;
370  }
371 
373 
375  void load(std::istream & in)
376  {
377  m_csa.load(in);
378  load_lcp(m_lcp, in, *this);
379  m_bp.load(in);
380  m_bp_support.load(in, &m_bp);
381  m_bp_rank10.load(in, &m_bp);
382  m_bp_select10.load(in, &m_bp);
383  }
384 
385  template <typename archive_t>
386  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
387  {
388  ar(CEREAL_NVP(m_csa));
389  ar(CEREAL_NVP(m_lcp));
390  ar(CEREAL_NVP(m_bp));
391  ar(CEREAL_NVP(m_bp_support));
392  ar(CEREAL_NVP(m_bp_rank10));
393  ar(CEREAL_NVP(m_bp_select10));
394  }
395 
396  template <typename archive_t>
397  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
398  {
399  ar(CEREAL_NVP(m_csa));
400  ar(CEREAL_NVP(m_lcp));
401  set_lcp_pointer(m_lcp, *this);
402  ar(CEREAL_NVP(m_bp));
403  ar(CEREAL_NVP(m_bp_support));
404  m_bp_support.set_vector(&m_bp);
405  ar(CEREAL_NVP(m_bp_rank10));
406  m_bp_rank10.set_vector(&m_bp);
407  ar(CEREAL_NVP(m_bp_select10));
408  m_bp_select10.set_vector(&m_bp);
409  }
410 
412  /* @{ */
413 
415 
419  node_type root() const { return 0; }
420 
422 
428  bool is_leaf(node_type v) const
429  {
430  assert(m_bp[v] == 1); // assert that v is a valid node of the suffix tree
431  // if there is a closing parenthesis at position v+1, the node is a leaf
432  return !m_bp[v + 1];
433  }
434 
436 
444  {
445  assert(i > 0 and i <= m_csa.size());
446  // -1 as select(i) returns the postion of the 0 of pattern 10
447  return m_bp_select10.select(i) - 1;
448  }
449 
451 
458  {
459  if (v == root()) // if v is the root
460  return 0;
461 
462  if (is_leaf(v))
463  { // if v is a leave
464  size_type i = m_bp_rank10(v); // get the index in the suffix array
465  return m_csa.size() - m_csa[i];
466  }
467  assert(inorder(v) > 0);
468  return m_lcp[inorder(v)];
469  }
470 
472 
479  {
480  // -2 as the root() we assign depth=0 to the root
481  return (m_bp_support.rank(v) << 1) - v - 2;
482  }
483 
485 
493  {
494  size_type r = m_bp_support.find_close(v);
495  return m_bp_rank10(r + 1) - m_bp_rank10(v);
496  }
497 
499 
504  node_type leftmost_leaf(const node_type v) const { return m_bp_select10(m_bp_rank10(v) + 1) - 1; }
505 
507 
513  {
514  size_type r = m_bp_support.find_close(v);
515  return m_bp_select10(m_bp_rank10(r + 1)) - 1;
516  }
517 
519 
526  size_type lb(const node_type v) const { return m_bp_rank10(v); }
527 
529 
536  size_type rb(const node_type v) const
537  {
538  size_type r = m_bp_support.find_close(v);
539  return m_bp_rank10(r + 1) - 1;
540  }
541 
543 
549  {
550  assert(m_bp[v] == 1); // assert a valid node
551  if (v == root())
552  return root();
553  else
554  {
555  return m_bp_support.enclose(v);
556  }
557  }
558 
560 
566 
568 
575  {
576  if (v == root()) return root();
577  node_type sib = m_bp_support.find_close(v) + 1;
578  if (m_bp[sib])
579  return sib;
580  else
581  return root();
582  }
583 
585  /*
586  * \param v A valid tree node of the cst.
587  * \param c First character of the edge label from v to the desired child.
588  * \param char_pos Reference which will hold the position (0-based) of the matching char c in the sorted text/suffix
589  * array. \return The child node w which edge label (v,w) starts with c or root() if it does not exist. \par Time
590  * complexity \f$ \Order( (\saaccess+\isaaccess) \cdot \sigma + \lcpaccess) \f$ \par Note With range median mimimum
591  * queries (RMMQ) one can code this operation in \f$\log \sigma \f$ time
592  */
593  node_type child(node_type v, const char_type c, size_type & char_pos) const
594  {
595  if (is_leaf(v)) // if v is a leaf = (), v has no child
596  return root();
597  // else v = ( ( ))
598  comp_char_type cc = m_csa.char2comp[c];
599  if (cc == 0 and c != 0) // TODO: aendere char2comp so ab, dass man diesen sonderfall nicht braucht
600  return root();
601  size_type char_ex_max_pos = m_csa.C[cc + 1], char_inc_min_pos = m_csa.C[cc];
602 
603  size_type d = depth(v); // time complexity: \lcpaccess
604  size_type res = v + 1;
605  while (true)
606  {
607  if (is_leaf(res)) { char_pos = get_char_pos(m_bp_rank10(res), d, m_csa); }
608  else
609  {
610  char_pos = get_char_pos(inorder(res), d, m_csa);
611  }
612  if (char_pos >= char_ex_max_pos) // if the current char is lex. greater than the searched char: exit
613  return root();
614  if (char_pos >= char_inc_min_pos) // if the current char is lex. equal with the
615  return res;
616  res = m_bp_support.find_close(res) + 1;
617  if (!m_bp[res]) // closing parenthesis: there exists no next child
618  return root();
619  }
620  }
621 
623  // \sa child(node_type v, const char_type c, size_type &char_pos)
624  node_type child(node_type v, const char_type c) const
625  {
626  size_type char_pos;
627  return child(v, c, char_pos);
628  }
629 
631 
640  {
641  if (is_leaf(v)) // if v is a leave, v has no child
642  return root();
643  size_type res = v + 1;
644  while (i > 1)
645  {
646  res = m_bp_support.find_close(res) + 1;
647  if (!m_bp[res])
648  { // closing parenthesis: there exists no next child
649  return root();
650  }
651  --i;
652  }
653  return res;
654  }
655 
657 
663  {
664  assert(1 <= d);
665  assert(d <= depth(v));
666  size_type i = 0; // index of the first suffix in the subtree of v
667  if (is_leaf(v))
668  { // if v is a leave
669  i = m_bp_rank10(v); // get the index in the suffix array
670  }
671  else
672  {
673  i = inorder(v);
674  }
675  size_type order = get_char_pos(i, d - 1, m_csa);
676  size_type c_begin = 1, c_end = ((size_type)m_csa.sigma) + 1, mid;
677  while (c_begin < c_end)
678  {
679  mid = (c_begin + c_end) >> 1;
680  if (m_csa.C[mid] <= order) { c_begin = mid + 1; }
681  else
682  {
683  c_end = mid;
684  }
685  }
686  return m_csa.comp2char[c_begin - 1];
687  }
688 
690 
698  {
699  assert(m_bp[v] == 1 and m_bp[w] == 1);
700  if (v > w) { std::swap(v, w); }
701  else if (v == w)
702  {
703  return v;
704  }
705  if (v == root()) return root();
706  return m_bp_support.double_enclose(v, w);
707  }
708 
710 
717  {
718  if (v == root()) return root();
719  // get leftmost leaf in the tree rooted at v
720  size_type left = m_bp_rank10(v);
721  if (is_leaf(v)) { return select_leaf(m_csa.psi[left] + 1); }
722  // get the rightmost leaf in the tree rooted at v
723  size_type right = m_bp_rank10(m_bp_support.find_close(v)) - 1;
724  assert(left < right);
725  node_type left_leaf = select_leaf(m_csa.psi[left] + 1);
726  node_type right_leaf = select_leaf(m_csa.psi[right] + 1);
727  return lca(left_leaf, right_leaf);
728  }
729 
731 
738  {
739  if (v == root()) return root();
740  // get leftmost leaf in the tree rooted at v
741  size_type left = m_bp_rank10(v);
742  if (is_leaf(v)) { return select_leaf(get_char_pos(left, i, m_csa) + 1); }
743  // get the rightmost leaf in the tree rooted at v
744  size_type right = m_bp_rank10(m_bp_support.find_close(v)) - 1;
745  assert(left < right);
746  node_type left_leaf = select_leaf(get_char_pos(left, i, m_csa) + 1);
747  node_type right_leaf = select_leaf(get_char_pos(right, i, m_csa) + 1);
748  return lca(left_leaf, right_leaf);
749  }
750 
752  /*
753  * \param v A valid node of a cst_sada.
754  * \param c The character which should be prepended to the string of the current node.
755  * \return root() if the Weiner link of (v, c) does not exist, otherwise the Weiner link is returned.
756  * \par Time complexity
757  * \f$ \Order{ t_{rank\_bwt} + t_{lca}}\f$
758  */
759  node_type wl(node_type v, const char_type c) const
760  {
761  // get leftmost leaf in the tree rooted at v
762  size_type left = m_bp_rank10(v);
763  // get the rightmost leaf in the tree rooted at v
764  size_type right = is_leaf(v) ? left : m_bp_rank10(m_bp_support.find_close(v)) - 1;
765 
766  size_type c_left = m_csa.bwt.rank(left, c);
767  size_type c_right = m_csa.bwt.rank(right + 1, c);
768 
769  if (c_left == c_right) // there exists no Weiner link
770  return root();
771  if (c_left + 1 == c_right)
772  return select_leaf(m_csa.C[m_csa.char2comp[c]] + c_left + 1);
773  else
774  {
775  size_type left = m_csa.C[m_csa.char2comp[c]] + c_left;
776  size_type right = m_csa.C[m_csa.char2comp[c]] + c_right - 1;
777  assert(left < right);
778  node_type left_leaf = select_leaf(left + 1);
779  node_type right_leaf = select_leaf(right + 1);
780  return lca(left_leaf, right_leaf);
781  }
782  }
783 
785 
791  {
792  assert(is_leaf(v));
793  // count the leaves left to leaf v
794  return m_csa[m_bp_rank10(v)];
795  }
796 
798 
806  {
807  // v+1 is < m_bp.size(), as v is the position of an open parenthesis
808  if (m_bp[v + 1])
809  { // case (a) inner node
810  return size() + (m_bp_support.rank(v) - 1) - m_bp_rank10(v);
811  }
812  else
813  { // case (b) leaf
814  return m_bp_rank10(v);
815  }
816  }
817 
819 
827  {
828  if (id < size())
829  { // the corresponding node is a leaf
830  return select_leaf(id + 1);
831  }
832  else
833  { // the corresponding node is a inner node
834  id = id + 1 - size();
835  // solved by binary search; TODO: can be done in constant time by using a select structure on the bitpattern
836  // 11
837  size_type lb = 0, rb = m_bp.size(); // lb inclusive, rb exclusive
838  // invariant: arg(lb) < id, arg(rb)>= id
839  while (rb - lb > 1)
840  {
841  size_type mid = lb + (rb - lb) / 2; // mid \in [0..m_bp.size()-1]
842  if (m_bp[mid] == 0 and m_bp[mid - 1] == 1)
843  { // if we are ``half on a leaf''
844  ++mid; // we step one to the right to include it
845  }
846  // get the number of open inner nodes before position mid, i.e. arg(mid)
847  size_type mid_id = m_bp_support.rank(mid - 1) -
848  m_bp_rank10(mid); // Note: mid-1 is valid of mid is of type ``size_type'' as us the
849  // parameter of rank
850  if (mid_id < id) { lb = mid; }
851  else
852  { // mid_id >= x
853  rb = mid;
854  }
855  }
856  return lb;
857  }
858  }
859 
861  /*
862  * \return The number of nodes of the suffix tree.
863  * \par Time complexity
864  * \f$ \Order{1} \f$
865  */
866  size_type nodes() const { return m_bp.size() >> 1; }
867 
869  /* \param lb Left bound of the lcp-interval [lb..rb] (inclusive).
870  * \param rb Right bound of the lcp-interval [lb..rb] (inclusive).
871  *\ return The node in the suffix tree corresponding lcp-interval [lb..rb]
872  */
874 
876 
883  {
884  size_type res = 0;
885  v = v + 1;
886  while (m_bp[v])
887  { // found open parentheses
888  ++res;
889  v = m_bp_support.find_close(v) + 1;
890  }
891  return res;
892  }
893 
895 
900  {
901  size_type ii = 0;
902  if (i > 0)
903  {
904  size_type ipos = m_bp_select10(i) - 1; // -1 as select returns the position of the zero
905  size_type ip1pos = m_bp_select10(i + 1) - 1; // " " " " " " " " "
906  ii = m_bp_support.double_enclose(ipos, ip1pos);
907  }
908  ii = m_bp_support.find_close(ii);
909  // all right, as bp[ii] = 0
910  return ii - m_bp_support.rank(ii) - m_bp_rank10(ii);
911  }
912  /* @} */
913 };
914 
915 } // end namespace sdsl
916 #endif
bp_support.hpp contains several classed which support find_open, find_close, enclose and rr-enclose q...
bp_support_sada.hpp contains an implementation of a balanced parentheses support structure proposed b...
#define CEREAL_NVP(X)
Definition: cereal.hpp:30
A forward iterator for a bottom up traversal of a suffix tree.
An forward iterator for (compressed) suffix trees.
A class for the Compressed Suffix Tree (CST) proposed by Sadakane.
Definition: cst_sada.hpp:73
cst_sada & operator=(const cst_sada &cst)
Assignment Operator.
Definition: cst_sada.hpp:312
const lcp_type & lcp
Definition: cst_sada.hpp:112
t_rank_10 rank_10_type
Definition: cst_sada.hpp:88
size_type tlcp_idx(size_type i) const
Maps an index i to the position in TLCP where LCP[i] can be found.
Definition: cst_sada.hpp:899
size_type sn(node_type v) const
Compute the suffix number of a leaf node v.
Definition: cst_sada.hpp:790
const bp_support_type & bp_support
Definition: cst_sada.hpp:114
size_type id(node_type v) const
Computes a unique identification number for a node of the suffix tree in the range [0....
Definition: cst_sada.hpp:805
node_type child(node_type v, const char_type c) const
Get the child w of node v which edge label (v,w) starts with character c.
Definition: cst_sada.hpp:624
node_type select_leaf(size_type i) const
Return the i-th leaf (1-based from left to right) of the suffix tree.
Definition: cst_sada.hpp:443
ptrdiff_t difference_type
Definition: cst_sada.hpp:81
node_type sibling(node_type v) const
Returns the next sibling of node v.
Definition: cst_sada.hpp:574
cst_sada(cst_sada &&cst)
Move constructor.
Definition: cst_sada.hpp:136
cst_sada(cache_config &config)
Construct CST from file_map.
Definition: cst_sada.hpp:150
t_select_10 select_10_type
Definition: cst_sada.hpp:89
node_type rightmost_leaf(const node_type v) const
Calculates the rightmost leaf in the subtree rooted at node v.
Definition: cst_sada.hpp:512
cst_sada()=default
Default constructor.
const t_csa & csa
Definition: cst_sada.hpp:111
size_type node_type
Type for the nodes in the tree.
Definition: cst_sada.hpp:86
cst_bottom_up_const_forward_iterator< cst_sada > const_bottom_up_iterator
Definition: cst_sada.hpp:79
bool operator!=(cst_sada const &other) const noexcept
Inequality operator.
Definition: cst_sada.hpp:352
const select_10_type & bp_select_10
Definition: cst_sada.hpp:116
t_csa::char_type char_type
Definition: cst_sada.hpp:84
node_type sl(node_type v) const
Compute the suffix link of node v.
Definition: cst_sada.hpp:716
t_csa::alphabet_category alphabet_category
Definition: cst_sada.hpp:94
size_type lb(const node_type v) const
Calculates the index of the leftmost leaf in the corresponding suffix array.
Definition: cst_sada.hpp:526
const_bottom_up_iterator end_bottom_up() const
Returns an iterator to the element after the last element of a bottom-up traversal of the tree.
Definition: cst_sada.hpp:306
const_iterator begin() const
Returns a const_iterator to the first element.
Definition: cst_sada.hpp:270
node_type leftmost_leaf(const node_type v) const
Calculates the leftmost leaf in the subtree rooted at node v.
Definition: cst_sada.hpp:504
size_type size() const
Number of leaves in the suffix tree.
Definition: cst_sada.hpp:252
t_csa::alphabet_type::comp_char_type comp_char_type
Definition: cst_sada.hpp:91
size_type degree(node_type v) const
Get the number of children of a node v.
Definition: cst_sada.hpp:882
bool operator==(cst_sada const &other) const noexcept
Equality operator.
Definition: cst_sada.hpp:344
bool is_leaf(node_type v) const
Decide if a node is a leaf in the suffix tree.
Definition: cst_sada.hpp:428
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize to a stream.
Definition: cst_sada.hpp:358
const bit_vector & bp
Definition: cst_sada.hpp:113
size_type size(node_type v) const
Calculate the number of leaves in the subtree rooted at node v.
Definition: cst_sada.hpp:492
t_csa csa_type
Definition: cst_sada.hpp:82
cst_node_child_proxy< cst_sada > children(node_type v) const
Return a proxy object which allows iterating over the children of a node.
Definition: cst_sada.hpp:565
node_type wl(node_type v, const char_type c) const
Compute the Weiner link of node v and character c.
Definition: cst_sada.hpp:759
size_type rb(const node_type v) const
Calculates the index of the rightmost leaf in the corresponding suffix array.
Definition: cst_sada.hpp:536
const_iterator begin(const node_type &v) const
Returns a const_iterator to the first element of a depth first traversal of the subtree rooted at nod...
Definition: cst_sada.hpp:278
t_lcp::template type< cst_sada > lcp_type
Definition: cst_sada.hpp:83
static size_type max_size()
Returns the maximal lenght of text for that a suffix tree can be build.
Definition: cst_sada.hpp:258
void load(std::istream &in)
Load from a stream.
Definition: cst_sada.hpp:375
node_type node(size_type lb, size_type rb) const
Get the node in the suffix tree which corresponds to the lcp-interval [lb..rb].
Definition: cst_sada.hpp:873
size_type node_depth(node_type v) const
Returns the node depth of node v.
Definition: cst_sada.hpp:478
const_iterator end(const node_type &v) const
Returns a const_iterator to the element past the end of a depth first traversal of the subtree rooted...
Definition: cst_sada.hpp:291
bool empty() const
Returns if the data strucutre is empty.
Definition: cst_sada.hpp:264
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: cst_sada.hpp:397
t_bp_support bp_support_type
Definition: cst_sada.hpp:87
size_type nodes() const
Get the number of nodes of the suffix tree.
Definition: cst_sada.hpp:866
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: cst_sada.hpp:386
cst_tag index_category
Definition: cst_sada.hpp:95
cst_sada(const cst_sada &cst)
Copy constructor.
Definition: cst_sada.hpp:122
const rank_10_type & bp_rank_10
Definition: cst_sada.hpp:115
cst_sada & operator=(cst_sada &&cst)
Move assignment Operator.
Definition: cst_sada.hpp:326
char_type edge(node_type v, size_type d) const
Returns the d-th character (1-based indexing) of the edge-label pointing to v.
Definition: cst_sada.hpp:662
node_type lca(node_type v, node_type w) const
Calculate the lowest common ancestor (lca) of two nodes v and w of the suffix tree.
Definition: cst_sada.hpp:697
node_type select_child(node_type v, size_type i) const
Get the i-th child of a node v.
Definition: cst_sada.hpp:639
node_type child(node_type v, const char_type c, size_type &char_pos) const
Get the child w of node v which edge label (v,w) starts with character c.
Definition: cst_sada.hpp:593
const_bottom_up_iterator begin_bottom_up() const
Returns an iterator to the first element of a bottom-up traversal of the tree.
Definition: cst_sada.hpp:298
node_type sl(node_type v, size_type i) const
Compute the suffix link of node v applied a number of times consecutively.
Definition: cst_sada.hpp:737
const_iterator end() const
Returns a const_iterator to the element after the last element.
Definition: cst_sada.hpp:288
t_csa::string_type string_type
Definition: cst_sada.hpp:85
node_type root() const
Return the root of the suffix tree.
Definition: cst_sada.hpp:419
t_csa::size_type size_type
Definition: cst_sada.hpp:80
cst_dfs_const_forward_iterator< cst_sada > const_iterator
Definition: cst_sada.hpp:75
size_type inv_id(size_type id)
Computes the node for such that id(v)=id.
Definition: cst_sada.hpp:826
size_type depth(node_type v) const
Returns the depth of node v.
Definition: cst_sada.hpp:457
node_type parent(node_type v) const
Calculate the parent node of a node v.
Definition: cst_sada.hpp:548
t_csa::alphabet_type::sigma_type sigma_type
Definition: cst_sada.hpp:92
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
void resize(const size_type size)
Resize the int_vector in terms of elements.
Definition: int_vector.hpp:545
static mm_event_proxy event(const std::string &name)
A stack which contains strictly increasing numbers in the range from to .
size_type top() const
Returns the topmost index on the stack.
void pop()
Pop the topmost index of the stack.
bool empty() const
Returns if the stack is empty.
void push(size_type x)
Push the index x of vector vec onto the stack.
size_type size() const
Returns the number of element is the stack.
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
construct.hpp contains methods to construct indexes (compressed suffix arrays and trees).
csa_sada.hpp contains an implementation of the compressed suffix array.
cst_iterators.hpp contains iterator classes for traversing (compressed) suffix arrays.
cst_sct3.hpp contains an implementation of the interval based CST.
int_vector.hpp contains the sdsl::int_vector class.
iterators.hpp contains an generic iterator for random access containers.
lcp_support_sada.hpp contains a compressed lcp array.
constexpr char KEY_CSA[]
Definition: config.hpp:38
constexpr char KEY_LCP[]
Definition: config.hpp:44
int_vector ::size_type size_type
Namespace for the succinct data structure library.
std::string cache_file_name(const std::string &key, const cache_config &config)
Returns the file name of the resource.
Definition: io.hpp:630
t_csa::size_type get_char_pos(typename t_csa::size_type idx, typename t_csa::size_type d, const t_csa &csa)
bool load_from_cache(T &v, const std::string &key, const cache_config &config, bool add_type_hash=false)
Definition: io.hpp:711
void move_lcp(t_lcp &&lcp, t_lcp &&lcp_c, const t_cst &cst)
Definition: lcp.hpp:92
void swap(int_vector_reference< t_int_vector > x, int_vector_reference< t_int_vector > y) noexcept
Definition: int_vector.hpp:970
void copy_lcp(t_lcp &lcp, const t_lcp &lcp_c, const t_cst &cst)
Definition: lcp.hpp:57
void construct_lcp(t_lcp &lcp, const t_cst &cst, cache_config &config)
Definition: lcp.hpp:25
void set_lcp_pointer(t_lcp &lcp, const t_cst &cst)
Definition: lcp.hpp:159
void load_lcp(t_lcp &lcp, std::istream &in, const t_cst &cst)
Definition: lcp.hpp:127
Contains declarations and definitions of data structure concepts.
select_support_mcl.hpp contains classes that support a sdsl::bit_vector with constant time select inf...
Helper class for construction process.
Definition: config.hpp:67
std::string id
Definition: config.hpp:72
std::string dir
Definition: config.hpp:71
suffix_tree_algorithm.hpp contains algorithms on CSTs
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.