8 #ifndef INCLUDED_SDSL_SUFFIX_ARRAY_ALGORITHM
9 #define INCLUDED_SDSL_SUFFIX_ARRAY_ALGORITHM
35 template <
class t_csa,
class t_pat_iter>
45 typename std::enable_if<std::is_same<csa_tag, typename t_csa::index_category>::value,
csa_tag>::type x =
49 assert(r < csa.size());
55 auto l_res_upper = r + 1;
56 auto r_res_upper = r + 1;
63 for (
auto current = begin; current != end; current++)
65 auto index = csa.char2comp[*current];
66 if (index == 0)
return -1;
67 if (csa.C[index + 1] - 1 < i)
return -1;
68 if (csa.C[index] > i)
return 1;
75 while (l_res < l_res_upper)
78 int result = compare(sample);
81 else if (result == -1)
88 while (r_res + 1 < r_res_upper)
91 int result = compare(sample);
94 else if (result == -1)
100 return r_res - l_res + 1;
117 template <
class t_csa>
122 typename t_csa::char_type c,
126 typename std::enable_if<std::is_same<csa_tag, typename t_csa::index_category>::value,
csa_tag>::type x =
155 template <
class t_csa>
160 typename t_csa::char_type c,
164 typename std::enable_if<std::is_same<csa_tag, typename t_csa::index_category>::value,
csa_tag>::type x =
168 assert(r < csa.size());
170 if (cc == 0 and c > 0)
178 if (l == 0 and r + 1 == csa.size())
181 r_res = csa.C[cc + 1] - 1;
185 l_res = c_begin + csa.bwt.rank(l, c);
186 r_res = c_begin + csa.bwt.rank(r + 1, c) - 1;
189 assert(r_res + 1 - l_res >= 0);
190 return r_res + 1 - l_res;
217 template <
class t_csa,
class t_pat_iter>
227 typename std::enable_if<std::is_same<csa_tag, typename t_csa::index_category>::value,
csa_tag>::type x =
231 while (begin < it and r + 1 - l > 0)
257 template <
class t_wt,
260 class t_sa_sample_strat,
262 class t_alphabet_strat>
276 assert(l_fwd <= r_fwd);
277 assert(r_fwd < csa_fwd.
size());
280 auto r_s_b = csa_fwd.
wavelet_tree.lex_count(l_fwd, r_fwd + 1, c);
282 size_type s = std::get<1>(r_s_b), b = std::get<2>(r_s_b);
283 size_type rank_r = r_fwd - l_fwd - s - b + rank_l;
284 l_fwd_res = c_begin + rank_l;
285 r_fwd_res = c_begin + rank_r;
286 assert(r_fwd_res + 1 >= l_fwd_res);
287 l_bwd_res = l_bwd + s;
288 r_bwd_res = r_bwd - b;
289 assert(r_bwd_res - l_bwd_res == r_fwd_res - l_fwd_res);
290 return r_fwd_res + 1 - l_fwd_res;
323 template <
class t_pat_iter,
327 class t_sa_sample_strat,
329 class t_alphabet_strat>
338 t_alphabet_strat> & csa_bwd,
352 while (begin < it and r_fwd + 1 - l_fwd > 0)
370 return r_fwd + 1 - l_fwd;
403 template <
class t_pat_iter,
407 class t_sa_sample_strat,
409 class t_alphabet_strat>
416 t_alphabet_strat> & csa_fwd,
431 t_pat_iter it = begin;
432 while (it < end and r_fwd + 1 - l_fwd > 0)
450 return r_fwd + 1 - l_fwd;
466 template <
class t_csa,
class t_pat_iter>
469 if (end - begin > (
typename std::iterator_traits<t_pat_iter>::difference_type)csa.size())
return 0;
475 template <
class t_csx,
class t_pat_iter>
478 typename t_csx::index_category tag;
479 return count(csx, begin, end, tag);
494 template <
class t_csx>
497 typename t_csx::index_category tag;
498 return count(csx, pat.begin(), pat.end(), tag);
513 template <
typename t_csx,
typename t_pat_iter>
514 auto lex_interval(
const t_csx & csx, t_pat_iter begin, t_pat_iter end) -> std::array<typename t_csx::size_type, 2>
516 std::array<typename t_csx::size_type, 2> res;
536 template <
class t_csa,
class t_pat_iter,
class t_rac =
int_vector<64>>
541 typename std::enable_if<std::is_same<csa_tag, typename t_csa::index_category>::value,
csa_tag>::type x =
545 occs =
backward_search(csa, 0, csa.size() - 1, begin, end, occ_begin, occ_end);
547 for (
typename t_csa::size_type i = 0; i < occs; ++i) { occ[i] = csa[occ_begin + i]; }
564 template <
class t_csx,
class t_rac =
int_vector<64>>
565 t_rac
locate(
const t_csx & csx,
const typename t_csx::string_type & pat)
567 typename t_csx::index_category tag;
568 return locate<t_csx, decltype(pat.begin()), t_rac>(csx, pat.begin(), pat.end(), tag);
583 template <
class t_csa,
class t_text_iter>
589 typename std::enable_if<std::is_same<csa_tag, typename t_csa::index_category>::value,
592 typename t_csa::extract_category extract_tag;
593 return extract(csa, begin, end, text, extract_tag);
597 template <
class t_csa,
class t_text_iter>
604 assert(end < csa.size());
605 assert(begin <= end);
606 auto steps = end - begin + 1;
609 auto order = csa.isa[end];
613 auto rc = csa.wavelet_tree.inverse_select(order);
616 order = csa.
C[csa.char2comp[c]] + j;
620 return end - begin + 1;
624 template <
class t_csa,
class t_text_iter>
631 assert(end < csa.size());
632 assert(begin <= end);
634 for (
typename t_csa::size_type i = 0, order = csa.isa[begin]; steps != 0; --steps, ++i)
637 if (steps != 0) order = csa.psi[order];
639 return end - begin + 1;
655 template <
class t_csa>
656 typename t_csa::string_type
661 typename std::enable_if<std::is_same<csa_tag, typename t_csa::index_category>::value,
csa_tag>::type x =
664 assert(end <= csa.size());
665 assert(begin <= end);
666 typedef typename t_csa::string_type string_type;
667 string_type result(end - begin + 1, (
typename string_type::value_type)0);
668 extract(csa, begin, end, result.begin());
A class for the Compressed Suffix Array (CSA) based on a Wavelet Tree (WT) of the Burrow Wheeler Tran...
const alphabet_type::char2comp_type & char2comp
const alphabet_type::C_type & C
alphabet_type::char_type char_type
size_type size() const
Number of elements in the .
const wavelet_tree_type & wavelet_tree
int_vector ::size_type size_type
size_type size() const noexcept
The number of elements in the int_vector.
csa_wt.hpp contains an implementation of the compressed suffix array based on a wavelet tree.
int_vector ::size_type size_type
Namespace for the succinct data structure library.
t_csa::char_type first_row_symbol(const typename t_csa::size_type i, const t_csa &csa)
Get the symbol at position i in the first row of the sorted suffixes of CSA.
auto lex_interval(const t_csx &csx, t_pat_iter begin, t_pat_iter end) -> std::array< typename t_csx::size_type, 2 >
Returns the lexicographic interval of a pattern in the SA.
csa_wt ::size_type bidirectional_search_backward(const csa_wt< t_wt, t_dens, t_inv_dens, t_sa_sample_strat, t_isa, t_alphabet_strat > &csa_fwd, SDSL_UNUSED const csa_wt< t_wt, t_dens, t_inv_dens, t_sa_sample_strat, t_isa, t_alphabet_strat > &csa_bwd, typename csa_wt<>::size_type l_fwd, typename csa_wt<>::size_type r_fwd, typename csa_wt<>::size_type l_bwd, typename csa_wt<>::size_type r_bwd, t_pat_iter begin, t_pat_iter end, typename csa_wt<>::size_type &l_fwd_res, typename csa_wt<>::size_type &r_fwd_res, typename csa_wt<>::size_type &l_bwd_res, typename csa_wt<>::size_type &r_bwd_res, SDSL_UNUSED typename std::enable_if< t_wt::lex_ordered, csa_tag >::type x=csa_tag())
Bidirectional search in backward direction.
uint64_t count(const t_k2_treap &treap, k2_treap_ns::point_type p1, k2_treap_ns::point_type p2)
Count how many points are in the rectangle (p1,p2)
csa_wt< t_wt >::size_type bidirectional_search(const csa_wt< t_wt, t_dens, t_inv_dens, t_sa_sample_strat, t_isa, t_alphabet_strat > &csa_fwd, typename csa_wt<>::size_type l_fwd, typename csa_wt<>::size_type r_fwd, typename csa_wt<>::size_type l_bwd, typename csa_wt<>::size_type r_bwd, typename csa_wt<>::char_type c, typename csa_wt<>::size_type &l_fwd_res, typename csa_wt<>::size_type &r_fwd_res, typename csa_wt<>::size_type &l_bwd_res, typename csa_wt<>::size_type &r_bwd_res, SDSL_UNUSED typename std::enable_if< t_wt::lex_ordered, csa_tag >::type x=csa_tag())
Bidirectional search for a character c on an interval of the suffix array.
t_csa::size_type forward_search(const t_csa &csa, typename t_csa::size_type l, typename t_csa::size_type r, t_pat_iter begin, t_pat_iter end, typename t_csa::size_type &l_res, typename t_csa::size_type &r_res, SDSL_UNUSED typename std::enable_if< std::is_same< csa_tag, typename t_csa::index_category >::value, csa_tag >::type x=csa_tag())
Forward search for a pattern in an -interval in the CSA.
csa_wt< t_wt >::size_type bidirectional_search_forward(SDSL_UNUSED const csa_wt< t_wt, t_dens, t_inv_dens, t_sa_sample_strat, t_isa, t_alphabet_strat > &csa_fwd, const csa_wt< t_wt, t_dens, t_inv_dens, t_sa_sample_strat, t_isa, t_alphabet_strat > &csa_bwd, typename csa_wt<>::size_type l_fwd, typename csa_wt<>::size_type r_fwd, typename csa_wt<>::size_type l_bwd, typename csa_wt<>::size_type r_bwd, t_pat_iter begin, t_pat_iter end, typename csa_wt<>::size_type &l_fwd_res, typename csa_wt<>::size_type &r_fwd_res, typename csa_wt<>::size_type &l_bwd_res, typename csa_wt<>::size_type &r_bwd_res, SDSL_UNUSED typename std::enable_if< t_wt::lex_ordered, csa_tag >::type x=csa_tag())
Bidirectional search in forward direction.
t_rac locate(const t_csa &csa, t_pat_iter begin, t_pat_iter end, SDSL_UNUSED typename std::enable_if< std::is_same< csa_tag, typename t_csa::index_category >::value, csa_tag >::type x=csa_tag())
Calculates all occurrences of a pattern pat in a CSA.
t_csa::size_type extract(const t_csa &csa, typename t_csa::size_type begin, typename t_csa::size_type end, t_text_iter text, SDSL_UNUSED typename std::enable_if< std::is_same< csa_tag, typename t_csa::index_category >::value, csa_tag >::type x=csa_tag())
Writes the substring T[begin..end] of the original text T to text[0..end-begin+1].
t_csa::size_type backward_search(const t_csa &csa, typename t_csa::size_type l, typename t_csa::size_type r, typename t_csa::char_type c, typename t_csa::size_type &l_res, typename t_csa::size_type &r_res, SDSL_UNUSED typename std::enable_if< std::is_same< csa_tag, typename t_csa::index_category >::value, csa_tag >::type x=csa_tag())
Backward search for a character c in an -interval in the CSA.
int_vector ::size_type size(const range_type &r)
Size of a range.
suffix_array_helper.hpp contains some helper classes for CSTs