8 #ifndef INCLUDED_SDSL_CST_SADA
9 #define INCLUDED_SDSL_CST_SADA
67 template <
class t_csa = csa_sada<>,
68 class t_lcp = lcp_support_sada<>,
69 class t_bp_support = bp_support_sada<>,
70 class t_rank_10 = rank_support_v5<10, 2>,
71 class t_select_10 = select_support_mcl<10, 2>>
75 "First template argument has to be a compressed suffix array.");
83 typedef typename t_lcp::template type<cst_sada>
lcp_type;
92 typedef typename t_csa::alphabet_type::sigma_type
sigma_type;
108 size_type inorder(
node_type v)
const {
return m_bp_rank10(m_bp_support.find_close(v + 1) + 1); }
111 const t_csa &
csa = m_csa;
125 , m_bp_support(cst.m_bp_support)
126 , m_bp_rank10(cst.m_bp_rank10)
127 , m_bp_select10(cst.m_bp_select10)
130 m_bp_support.set_vector(&m_bp);
131 m_bp_rank10.set_vector(&m_bp);
132 m_bp_select10.set_vector(&m_bp);
137 : m_csa(std::move(cst.m_csa))
138 , m_bp(std::move(cst.m_bp))
139 , m_bp_support(std::move(cst.m_bp_support))
140 , m_bp_rank10(std::move(cst.m_bp_rank10))
141 , m_bp_select10(std::move(cst.m_bp_select10))
144 m_bp_support.set_vector(&m_bp);
145 m_bp_rank10.set_vector(&m_bp);
146 m_bp_select10.set_vector(&m_bp);
156 const bool o_par =
true;
157 const bool c_par = !o_par;
174 while (stack.
top() > x)
179 if (stack.
top() < x) { stack.
push(x); }
182 while (--co > 0) m_bp[p--] = c_par;
186 while (stack.
size() > 1)
202 }
while (m_bp[++p] == c_par);
207 while (stack.
top() > x)
212 if (stack.
top() < x) { stack.
push(x); }
214 while (co-- > 0) m_bp[q++] = o_par;
215 while (cc-- > 0) m_bp[q++] = c_par;
219 while (!stack.
empty())
232 util::init_support(m_bp_support, &m_bp);
233 util::init_support(m_bp_rank10, &m_bp);
234 util::init_support(m_bp_select10, &m_bp);
264 bool empty()
const {
return m_csa.empty(); }
272 if (0 == m_bp.
size())
280 if (0 == m_bp.
size() and
root() == v)
return end();
300 if (0 == m_bp.
size())
317 *
this = std::move(tmp);
330 m_csa = std::move(cst.m_csa);
332 m_bp = std::move(cst.m_bp);
333 m_bp_support = std::move(cst.m_bp_support);
334 m_bp_support.set_vector(&m_bp);
335 m_bp_rank10 = std::move(cst.m_bp_rank10);
336 m_bp_rank10.set_vector(&m_bp);
337 m_bp_select10 = std::move(cst.m_bp_select10);
338 m_bp_select10.set_vector(&m_bp);
346 return (m_csa == other.m_csa) && (m_lcp == other.m_lcp) && (m_bp == other.m_bp) &&
347 (m_bp_support == other.m_bp_support) && (m_bp_rank10 == other.m_bp_rank10) &&
348 (m_bp_select10 == other.m_bp_select10);
362 written_bytes += m_csa.serialize(out,
child,
"csa");
363 written_bytes += m_lcp.serialize(out,
child,
"lcp");
365 written_bytes += m_bp_support.serialize(out,
child,
"bp_support");
366 written_bytes += m_bp_rank10.serialize(out,
child,
"bp_rank_10");
367 written_bytes += m_bp_select10.serialize(out,
child,
"bp_select_10");
369 return written_bytes;
380 m_bp_support.load(in, &m_bp);
381 m_bp_rank10.load(in, &m_bp);
382 m_bp_select10.load(in, &m_bp);
385 template <
typename archive_t>
396 template <
typename archive_t>
404 m_bp_support.set_vector(&m_bp);
406 m_bp_rank10.set_vector(&m_bp);
408 m_bp_select10.set_vector(&m_bp);
430 assert(m_bp[v] == 1);
445 assert(i > 0 and i <= m_csa.size());
447 return m_bp_select10.select(i) - 1;
465 return m_csa.size() - m_csa[i];
467 assert(inorder(v) > 0);
468 return m_lcp[inorder(v)];
481 return (m_bp_support.rank(v) << 1) - v - 2;
494 size_type r = m_bp_support.find_close(v);
495 return m_bp_rank10(r + 1) - m_bp_rank10(v);
514 size_type r = m_bp_support.find_close(v);
515 return m_bp_select10(m_bp_rank10(r + 1)) - 1;
538 size_type r = m_bp_support.find_close(v);
539 return m_bp_rank10(r + 1) - 1;
550 assert(m_bp[v] == 1);
555 return m_bp_support.enclose(v);
577 node_type sib = m_bp_support.find_close(v) + 1;
599 if (cc == 0 and c != 0)
601 size_type char_ex_max_pos = m_csa.C[cc + 1], char_inc_min_pos = m_csa.C[cc];
612 if (char_pos >= char_ex_max_pos)
614 if (char_pos >= char_inc_min_pos)
616 res = m_bp_support.find_close(res) + 1;
627 return child(v, c, char_pos);
646 res = m_bp_support.find_close(res) + 1;
665 assert(d <=
depth(v));
677 while (c_begin < c_end)
679 mid = (c_begin + c_end) >> 1;
680 if (m_csa.C[mid] <= order) { c_begin = mid + 1; }
686 return m_csa.comp2char[c_begin - 1];
699 assert(m_bp[v] == 1 and m_bp[w] == 1);
706 return m_bp_support.double_enclose(v, w);
723 size_type right = m_bp_rank10(m_bp_support.find_close(v)) - 1;
724 assert(left < right);
727 return lca(left_leaf, right_leaf);
744 size_type right = m_bp_rank10(m_bp_support.find_close(v)) - 1;
745 assert(left < right);
748 return lca(left_leaf, right_leaf);
764 size_type right =
is_leaf(v) ? left : m_bp_rank10(m_bp_support.find_close(v)) - 1;
766 size_type c_left = m_csa.bwt.rank(left, c);
767 size_type c_right = m_csa.bwt.rank(right + 1, c);
769 if (c_left == c_right)
771 if (c_left + 1 == c_right)
772 return select_leaf(m_csa.C[m_csa.char2comp[c]] + c_left + 1);
775 size_type left = m_csa.C[m_csa.char2comp[c]] + c_left;
776 size_type right = m_csa.C[m_csa.char2comp[c]] + c_right - 1;
777 assert(left < right);
780 return lca(left_leaf, right_leaf);
794 return m_csa[m_bp_rank10(v)];
810 return size() + (m_bp_support.rank(v) - 1) - m_bp_rank10(v);
814 return m_bp_rank10(v);
834 id =
id + 1 -
size();
842 if (m_bp[mid] == 0 and m_bp[mid - 1] == 1)
847 size_type mid_id = m_bp_support.rank(mid - 1) -
850 if (mid_id <
id) {
lb = mid; }
889 v = m_bp_support.find_close(v) + 1;
905 size_type ip1pos = m_bp_select10(i + 1) - 1;
906 ii = m_bp_support.double_enclose(ipos, ip1pos);
908 ii = m_bp_support.find_close(ii);
910 return ii - m_bp_support.rank(ii) - m_bp_rank10(ii);
bp_support.hpp contains several classed which support find_open, find_close, enclose and rr-enclose q...
bp_support_sada.hpp contains an implementation of a balanced parentheses support structure proposed b...
A forward iterator for a bottom up traversal of a suffix tree.
An forward iterator for (compressed) suffix trees.
A class for the Compressed Suffix Tree (CST) proposed by Sadakane.
cst_sada & operator=(const cst_sada &cst)
Assignment Operator.
size_type tlcp_idx(size_type i) const
Maps an index i to the position in TLCP where LCP[i] can be found.
size_type sn(node_type v) const
Compute the suffix number of a leaf node v.
const bp_support_type & bp_support
size_type id(node_type v) const
Computes a unique identification number for a node of the suffix tree in the range [0....
node_type child(node_type v, const char_type c) const
Get the child w of node v which edge label (v,w) starts with character c.
node_type select_leaf(size_type i) const
Return the i-th leaf (1-based from left to right) of the suffix tree.
ptrdiff_t difference_type
node_type sibling(node_type v) const
Returns the next sibling of node v.
cst_sada(cst_sada &&cst)
Move constructor.
cst_sada(cache_config &config)
Construct CST from file_map.
t_select_10 select_10_type
node_type rightmost_leaf(const node_type v) const
Calculates the rightmost leaf in the subtree rooted at node v.
cst_sada()=default
Default constructor.
size_type node_type
Type for the nodes in the tree.
cst_bottom_up_const_forward_iterator< cst_sada > const_bottom_up_iterator
bool operator!=(cst_sada const &other) const noexcept
Inequality operator.
const select_10_type & bp_select_10
t_csa::char_type char_type
node_type sl(node_type v) const
Compute the suffix link of node v.
t_csa::alphabet_category alphabet_category
size_type lb(const node_type v) const
Calculates the index of the leftmost leaf in the corresponding suffix array.
const_bottom_up_iterator end_bottom_up() const
Returns an iterator to the element after the last element of a bottom-up traversal of the tree.
const_iterator begin() const
Returns a const_iterator to the first element.
node_type leftmost_leaf(const node_type v) const
Calculates the leftmost leaf in the subtree rooted at node v.
size_type size() const
Number of leaves in the suffix tree.
t_csa::alphabet_type::comp_char_type comp_char_type
size_type degree(node_type v) const
Get the number of children of a node v.
bool operator==(cst_sada const &other) const noexcept
Equality operator.
bool is_leaf(node_type v) const
Decide if a node is a leaf in the suffix tree.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize to a stream.
size_type size(node_type v) const
Calculate the number of leaves in the subtree rooted at node v.
cst_node_child_proxy< cst_sada > children(node_type v) const
Return a proxy object which allows iterating over the children of a node.
node_type wl(node_type v, const char_type c) const
Compute the Weiner link of node v and character c.
size_type rb(const node_type v) const
Calculates the index of the rightmost leaf in the corresponding suffix array.
const_iterator begin(const node_type &v) const
Returns a const_iterator to the first element of a depth first traversal of the subtree rooted at nod...
t_lcp::template type< cst_sada > lcp_type
static size_type max_size()
Returns the maximal lenght of text for that a suffix tree can be build.
void load(std::istream &in)
Load from a stream.
node_type node(size_type lb, size_type rb) const
Get the node in the suffix tree which corresponds to the lcp-interval [lb..rb].
size_type node_depth(node_type v) const
Returns the node depth of node v.
const_iterator end(const node_type &v) const
Returns a const_iterator to the element past the end of a depth first traversal of the subtree rooted...
bool empty() const
Returns if the data strucutre is empty.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
t_bp_support bp_support_type
size_type nodes() const
Get the number of nodes of the suffix tree.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
cst_sada(const cst_sada &cst)
Copy constructor.
const rank_10_type & bp_rank_10
cst_sada & operator=(cst_sada &&cst)
Move assignment Operator.
char_type edge(node_type v, size_type d) const
Returns the d-th character (1-based indexing) of the edge-label pointing to v.
node_type lca(node_type v, node_type w) const
Calculate the lowest common ancestor (lca) of two nodes v and w of the suffix tree.
node_type select_child(node_type v, size_type i) const
Get the i-th child of a node v.
node_type child(node_type v, const char_type c, size_type &char_pos) const
Get the child w of node v which edge label (v,w) starts with character c.
const_bottom_up_iterator begin_bottom_up() const
Returns an iterator to the first element of a bottom-up traversal of the tree.
node_type sl(node_type v, size_type i) const
Compute the suffix link of node v applied a number of times consecutively.
const_iterator end() const
Returns a const_iterator to the element after the last element.
t_csa::string_type string_type
node_type root() const
Return the root of the suffix tree.
t_csa::size_type size_type
cst_dfs_const_forward_iterator< cst_sada > const_iterator
size_type inv_id(size_type id)
Computes the node for such that id(v)=id.
size_type depth(node_type v) const
Returns the depth of node v.
node_type parent(node_type v) const
Calculate the parent node of a node v.
t_csa::alphabet_type::sigma_type sigma_type
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
void resize(const size_type size)
Resize the int_vector in terms of elements.
static mm_event_proxy event(const std::string &name)
A stack which contains strictly increasing numbers in the range from to .
size_type top() const
Returns the topmost index on the stack.
void pop()
Pop the topmost index of the stack.
bool empty() const
Returns if the stack is empty.
void push(size_type x)
Push the index x of vector vec onto the stack.
size_type size() const
Returns the number of element is the stack.
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
construct.hpp contains methods to construct indexes (compressed suffix arrays and trees).
csa_sada.hpp contains an implementation of the compressed suffix array.
cst_iterators.hpp contains iterator classes for traversing (compressed) suffix arrays.
cst_sct3.hpp contains an implementation of the interval based CST.
int_vector.hpp contains the sdsl::int_vector class.
iterators.hpp contains an generic iterator for random access containers.
lcp_support_sada.hpp contains a compressed lcp array.
int_vector ::size_type size_type
Namespace for the succinct data structure library.
std::string cache_file_name(const std::string &key, const cache_config &config)
Returns the file name of the resource.
t_csa::size_type get_char_pos(typename t_csa::size_type idx, typename t_csa::size_type d, const t_csa &csa)
bool load_from_cache(T &v, const std::string &key, const cache_config &config, bool add_type_hash=false)
void move_lcp(t_lcp &&lcp, t_lcp &&lcp_c, const t_cst &cst)
void swap(int_vector_reference< t_int_vector > x, int_vector_reference< t_int_vector > y) noexcept
void copy_lcp(t_lcp &lcp, const t_lcp &lcp_c, const t_cst &cst)
void construct_lcp(t_lcp &lcp, const t_cst &cst, cache_config &config)
void set_lcp_pointer(t_lcp &lcp, const t_cst &cst)
void load_lcp(t_lcp &lcp, std::istream &in, const t_cst &cst)
Contains declarations and definitions of data structure concepts.
select_support_mcl.hpp contains classes that support a sdsl::bit_vector with constant time select inf...
Helper class for construction process.
suffix_tree_algorithm.hpp contains algorithms on CSTs
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.