SDSL  3.0.0
Succinct Data Structure Library
suffix_array_helper.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
8 #ifndef INCLUDED_SDSL_SUFFIX_ARRAY_HELPER
9 #define INCLUDED_SDSL_SUFFIX_ARRAY_HELPER
10 
11 #include <cassert>
12 #include <cstdlib>
13 #include <stdint.h>
14 
15 #include <sdsl/iterators.hpp>
16 
17 namespace sdsl
18 {
19 
21 /*
22  * \param i Position in the first row.
23  * \param csa CSA
24  * \par Time complexity
25  * \f$ \Order{\log \sigma} \f$
26  * TODO: add hinted binary search? Two way binary search?
27  */
28 template <typename t_csa>
29 typename t_csa::char_type first_row_symbol(const typename t_csa::size_type i, const t_csa & csa)
30 {
31  assert(i < csa.size());
32  if (csa.sigma < 16)
33  { //<- if sigma is small search linear
34  typename t_csa::size_type res = 1;
35  while (res < csa.sigma and csa.C[res] <= i) ++res;
36  return csa.comp2char[res - 1];
37  }
38  else
39  {
40  // binary search the character with C
41  typename t_csa::size_type upper_c = csa.sigma,
42  lower_c = 0; // lower_c inclusive, upper_c exclusive
43  typename t_csa::size_type res = 0;
44  do {
45  res = (upper_c + lower_c) / 2;
46  if (i < csa.C[res]) { upper_c = res; }
47  else if (i >= csa.C[res + 1])
48  {
49  lower_c = res + 1;
50  }
51  } while (i < csa.C[res] or i >= csa.C[res + 1]); // i is not in the interval
52  return csa.comp2char[res];
53  }
54 }
55 
56 // psi[] trait
57 template <typename t_csa, bool t_direction>
59 {
60  typedef typename t_csa::value_type value_type;
61  typedef typename t_csa::size_type size_type;
62  static value_type access(const t_csa & csa, size_type i) { return csa.psi[i]; }
63 };
64 
65 // lf[] trait
66 template <typename t_csa>
67 struct traverse_csa_psi_trait<t_csa, false>
68 {
69  typedef typename t_csa::value_type value_type;
70  typedef typename t_csa::size_type size_type;
71  static value_type access(const t_csa & csa, size_type i)
72  {
73  // TODO: in case of a very sparse sampling of SA it may be faster to
74  // use \sigma binary searches on PSI function to determine the
75  // LF values.
76  return csa.isa[(csa[i] + csa.size() - 1) % csa.size()];
77  }
78 };
79 
80 template <typename t_csa, bool t_direction>
82 {
83  public:
84  typedef typename t_csa::value_type value_type;
85  typedef typename t_csa::size_type size_type;
86  typedef typename t_csa::difference_type difference_type;
90 
91  private:
92  const t_csa & m_csa;
93 
94  public:
96  traverse_csa_psi(const t_csa & csa_psi)
97  : m_csa(csa_psi)
98  {}
101  : m_csa(tcsa.m_csa)
102  {}
103 
105 
108  {
109  assert(i < size());
111  }
112 
114  size_type size() const { return m_csa.size(); }
115 
117  size_type empty() const { return m_csa.empty(); }
118 
119  const_iterator begin() const { return const_iterator(this, 0); }
120 
122 
125  const_iterator end() const { return const_iterator(this, size()); }
126 };
127 
128 // psi[] trait
129 template <typename t_csa, bool t_direction>
131 {
132  typedef typename t_csa::value_type value_type;
133  typedef typename t_csa::size_type size_type;
134  static value_type access(const t_csa & csa, size_type i)
135  {
136  // \f$\Psi[i] = SA^{-1}[SA[i]+1 \mod n]\f$, where \f$n\f$ is the length of the suffix array SA
137  return csa.isa[(csa[i] + 1) % csa.size()];
138  }
139 };
140 
141 // lf[] trait
142 template <typename t_csa>
143 struct traverse_csa_saisa_trait<t_csa, false>
144 {
145  typedef typename t_csa::value_type value_type;
146  typedef typename t_csa::size_type size_type;
147  static value_type access(const t_csa & csa, size_type i)
148  {
149  // TODO: in case of a very sparse sampling of SA it may be faster to
150  // use \sigma binary searches on PSI function to determine the
151  // LF values.
152  return csa.isa[(csa[i] + csa.size() - 1) % csa.size()];
153  }
154 };
155 
158 template <typename t_csa, bool t_direction>
160 {
161  public:
162  typedef typename t_csa::value_type value_type;
163  typedef typename t_csa::size_type size_type;
164  typedef typename t_csa::difference_type difference_type;
168 
169  private:
170  const t_csa & m_csa;
171 
172  public:
174  traverse_csa_saisa(const t_csa & csa)
175  : m_csa(csa)
176  {}
177 
178  // Copy constructor
180  : m_csa(tcsa.m_csa)
181  {}
182 
184 
189  {
190  assert(i < size());
192  }
193 
195  size_type size() const { return m_csa.size(); }
196 
198  size_type empty() const { return m_csa, empty(); }
199 
201 
204  const_iterator begin() const { return const_iterator(this, 0); }
205 
207 
210  const_iterator end() const { return const_iterator(this, size()); }
211 };
212 
214 template <typename t_csa>
216 {
217  public:
218  typedef typename t_csa::char_type value_type;
219  typedef typename t_csa::size_type size_type;
220  typedef typename t_csa::char_type char_type;
221  typedef typename t_csa::difference_type difference_type;
224  typedef typename t_csa::alphabet_category alphabet_category;
225 
226  private:
227  const t_csa & m_csa; //<- pointer to the (compressed) suffix array that is based on the \f$\Psi\f$ function.
228  public:
230  bwt_of_csa_psi(const t_csa & csa)
231  : m_csa(csa)
232  {}
233 
235 
240  {
241  assert(i < size());
242  size_type pos = m_csa.lf[i];
243  return first_row_symbol(pos, m_csa);
244  }
245 
247 
254  size_type rank(size_type i, const char_type c) const { return m_csa.rank_bwt(i, c); }
255 
257 
264  size_type select(size_type i, const char_type c) const { return m_csa.select_bwt(i, c); }
265 
267  size_type size() const { return m_csa.size(); }
268 
270  size_type empty() const { return m_csa.empty(); }
271 
273 
276  const_iterator begin() const { return const_iterator(this, 0); }
277 
279 
282  const_iterator end() const { return const_iterator(this, size()); }
283 };
284 
285 // psi[] trait
286 template <typename t_csa, bool t_direction>
288 {
289  typedef typename t_csa::value_type value_type;
290  typedef typename t_csa::char_type char_type;
291  typedef typename t_csa::size_type size_type;
292  static value_type access(const t_csa & csa, size_type i)
293  {
294  char_type c = csa.F[i];
295  return csa.wavelet_tree.select(i - csa.C[csa.char2comp[c]] + 1, c);
296  }
297 };
298 
299 // lf[] trait
300 template <typename t_csa>
301 struct traverse_csa_wt_traits<t_csa, false>
302 {
303  typedef typename t_csa::value_type value_type;
304  typedef typename t_csa::char_type char_type;
305  typedef typename t_csa::size_type size_type;
306  static value_type access(const t_csa & csa, size_type i)
307  {
308  typename t_csa::char_type c;
309  auto rc = csa.wavelet_tree.inverse_select(i);
310  size_type j = rc.first;
311  c = rc.second;
312  return csa.C[csa.char2comp[c]] + j;
313  }
314 };
315 
318 template <typename t_csa, bool t_direction>
320 {
321  public:
322  typedef typename t_csa::value_type value_type;
323  typedef typename t_csa::size_type size_type;
324  typedef typename t_csa::char_type char_type;
325  typedef typename t_csa::difference_type difference_type;
329 
330  private:
331  const t_csa & m_csa; //<- pointer to the (compressed) suffix array that is based on a wavelet tree
332  traverse_csa_wt(){}; // disable default constructor
333  public:
335  traverse_csa_wt(const t_csa & csa_wt)
336  : m_csa(csa_wt)
337  {}
339 
344  {
345  assert(i < m_csa.size());
347  }
348 
350  size_type size() const { return m_csa.size(); }
352  size_type empty() const { return m_csa.empty(); }
354  const_iterator begin() const { return const_iterator(this, 0); }
356  const_iterator end() const { return const_iterator(this, size()); }
357 };
358 
359 template <typename t_csa>
361 {
362  public:
363  typedef const typename t_csa::char_type value_type;
364  typedef typename t_csa::size_type size_type;
365  typedef typename t_csa::char_type char_type;
366  typedef typename t_csa::difference_type difference_type;
369  typedef typename t_csa::alphabet_category alphabet_category;
370 
371  private:
372  const t_csa & m_csa; //<- pointer to the (compressed) suffix array that is based on a wavelet tree
373  bwt_of_csa_wt(){}; // disable default constructor
374  public:
376  bwt_of_csa_wt(const t_csa & csa_wt)
377  : m_csa(csa_wt)
378  {}
380 
385  {
386  assert(i < size());
387  return m_csa.wavelet_tree[i];
388  }
390  size_type size() const { return m_csa.size(); }
391 
393 
400  size_type rank(size_type i, const char_type c) const { return m_csa.rank_bwt(i, c); }
401 
403 
410  size_type select(size_type i, const char_type c) const { return m_csa.select(i, c); }
411 
413  size_type empty() const { return m_csa.empty(); }
415  const_iterator begin() const { return const_iterator(this, 0); }
417  const_iterator end() const { return const_iterator(this, size()); }
418 };
419 
420 template <typename t_csa>
422 {
423  public:
424  typedef typename t_csa::value_type value_type;
425  typedef typename t_csa::size_type size_type;
426  typedef typename t_csa::difference_type difference_type;
430 
431  private:
432  const t_csa & m_csa; //<- pointer to the (compressed) suffix array that is based on a wavelet tree
433  isa_of_csa_wt(){}; // disable default constructor
434  public:
436  isa_of_csa_wt(const t_csa & csa_wt)
437  : m_csa(csa_wt)
438  {}
439 
441 
444  {
445  assert(i < size());
446  auto sample = m_csa.isa_sample.sample_qeq(i);
447  value_type result = std::get<0>(sample);
448  if (std::get<1>(sample) < i) { i = std::get<1>(sample) + m_csa.size() - i; }
449  else
450  {
451  i = std::get<1>(sample) - i;
452  }
453  while (i--) { result = m_csa.lf[result]; }
454  return result;
455  }
456 
458  size_type size() const { return m_csa.size(); }
460  size_type empty() const { return m_csa.empty(); }
462  const_iterator begin() const { return const_iterator(this, 0); }
464  const_iterator end() const { return const_iterator(this, size()); }
465 };
466 
467 template <typename t_csa>
469 {
470  public:
471  typedef typename t_csa::value_type value_type;
472  typedef typename t_csa::size_type size_type;
473  typedef typename t_csa::difference_type difference_type;
477 
478  private:
479  const t_csa & m_csa; //<- pointer to the (compressed) suffix array that is based on a wavelet tree
480  isa_of_csa_psi(){}; // disable default constructor
481  public:
483  isa_of_csa_psi(const t_csa & csa_wt)
484  : m_csa(csa_wt)
485  {}
486 
488 
491  {
492  assert(i < size());
493  // get the rightmost sampled isa value to the left of i
494  auto sample = m_csa.isa_sample.sample_leq(i);
495  value_type result = std::get<0>(sample);
496  i = i - std::get<1>(sample);
497  while (i--) { result = m_csa.psi[result]; }
498  return result;
499  }
501  size_type size() const { return m_csa.size(); }
503  size_type empty() const { return m_csa.empty(); }
505  const_iterator begin() const { return const_iterator(this, 0); }
507  const_iterator end() const { return const_iterator(this, size()); }
508 };
509 
510 template <typename t_csa>
512 {
513  public:
514  typedef const typename t_csa::char_type value_type;
515  typedef typename t_csa::size_type size_type;
516  typedef typename t_csa::difference_type difference_type;
519  typedef typename t_csa::alphabet_category alphabet_category;
520 
521  private:
522  const t_csa & m_csa;
523 
524  public:
526  first_row_of_csa(const t_csa & csa)
527  : m_csa(csa)
528  {}
530 
535  {
536  assert(i < size());
537  return first_row_symbol(i, m_csa);
538  }
540  size_type size() const { return m_csa.size(); }
542  size_type empty() const { return m_csa.empty(); }
544  const_iterator begin() const { return const_iterator(this, 0); }
546  const_iterator end() const { return const_iterator(this, size()); }
547 };
548 
549 template <typename t_csa>
551 {
552  public:
553  typedef typename t_csa::char_type value_type;
554  typedef typename t_csa::size_type size_type;
555  typedef typename t_csa::difference_type difference_type;
558  typedef typename t_csa::alphabet_category alphabet_category;
559 
560  private:
561  const t_csa & m_csa;
562  text_of_csa() {}
563 
564  public:
566  text_of_csa(const t_csa & csa)
567  : m_csa(csa)
568  {}
569 
571 
576  {
577  assert(i < size());
578  return first_row_symbol(m_csa.isa[i], m_csa);
579  }
580 
582  size_type size() const { return m_csa.size(); }
583 
585  size_type empty() const { return m_csa.empty(); }
586 
588 
591  const_iterator begin() const { return const_iterator(this, 0); }
592 
594 
597  const_iterator end() const { return const_iterator(this, size()); }
598 };
599 } // namespace sdsl
600 
601 #endif
A wrapper for the bwt of a compressed suffix array that is based on the function.
size_type size() const
Returns the size of the function.
value_type operator[](size_type i) const
Calculate the Burrows Wheeler Transform (BWT) at position i.
random_access_const_iterator< bwt_of_csa_psi > const_iterator
t_csa::alphabet_category alphabet_category
bwt_of_csa_psi(const t_csa &csa)
Constructor.
size_type rank(size_type i, const char_type c) const
Calculates how many symbols c are in the prefix [0..i-1].
size_type empty() const
Returns if the bwt is empty.
size_type select(size_type i, const char_type c) const
Calculates the position of the i-th c.
const_iterator end() const
Returns a const_iterator to the element after the last element.
const_iterator begin() const
Returns a const_iterator to the first element.
t_csa::difference_type difference_type
bwt_of_csa_wt(const t_csa &csa_wt)
Constructor.
size_type rank(size_type i, const char_type c) const
Calculates how many symbols c are in the prefix [0..i-1].
size_type select(size_type i, const char_type c) const
Calculates the position of the i-th c.
random_access_const_iterator< bwt_of_csa_wt > const_iterator
const t_csa::char_type value_type
size_type empty() const
Returns if the BWT function is empty.
size_type size() const
Returns the size of the BWT function.
const_iterator begin() const
Returns a const_iterator to the first element.
value_type operator[](size_type i) const
Calculate the Burrows Wheeler Transform (BWT) at position i.
t_csa::char_type char_type
t_csa::difference_type difference_type
t_csa::size_type size_type
t_csa::alphabet_category alphabet_category
const_iterator end() const
Returns a const_iterator to the element after the last element.
A class for the Compressed Suffix Array (CSA) based on a Wavelet Tree (WT) of the Burrow Wheeler Tran...
Definition: csa_wt.hpp:56
const_iterator end() const
Returns a const_iterator to the element after the last element.
random_access_const_iterator< first_row_of_csa > const_iterator
first_row_of_csa(const t_csa &csa)
Constructor.
const t_csa::char_type value_type
t_csa::alphabet_category alphabet_category
size_type empty() const
Returns if the F column is empty.
t_csa::difference_type difference_type
value_type operator[](size_type i) const
Calculate F[i].
const_iterator begin() const
Returns a const_iterator to the first element.
size_type size() const
Returns the size of the F column.
int_alphabet_tag alphabet_category
isa_of_csa_psi(const t_csa &csa_wt)
Constructor.
const_iterator begin() const
Returns a const_iterator to the first element.
size_type empty() const
Returns if the CSA is empty.
const_iterator end() const
Returns a const_iterator to the element after the last element.
size_type size() const
Returns the size of the CSA.
value_type operator[](size_type i) const
Access operator to ISA.
random_access_const_iterator< isa_of_csa_psi > const_iterator
t_csa::difference_type difference_type
t_csa::value_type value_type
size_type empty() const
Returns if the CSA is empty.
size_type size() const
Returns the size of the CSA.
t_csa::difference_type difference_type
const_iterator begin() const
Returns a const_iterator to the first element.
isa_of_csa_wt(const t_csa &csa_wt)
Constructor.
t_csa::size_type size_type
t_csa::value_type value_type
int_alphabet_tag alphabet_category
const_iterator end() const
Returns a const_iterator to the element after the last element.
value_type operator[](size_type i) const
Access operator to ISA.
random_access_const_iterator< isa_of_csa_wt > const_iterator
Generic iterator for a random access container.
Definition: iterators.hpp:24
value_type operator[](size_type i) const
Character at index of the original text.
size_type empty() const
Returns if text text has size 0.
t_csa::difference_type difference_type
t_csa::char_type value_type
t_csa::alphabet_category alphabet_category
text_of_csa(const t_csa &csa)
Constructor.
size_type size() const
Returns the size of the original text.
random_access_const_iterator< text_of_csa > const_iterator
const_iterator begin() const
Returns a const_iterator to the first element.
const_iterator end() const
Returns a const_iterator to the element after the last element.
t_csa::size_type size_type
random_access_const_iterator< traverse_csa_psi > const_iterator
t_csa::difference_type difference_type
size_type size() const
Returns the size of the function.
const_iterator begin() const
const_iterator end() const
Returns a const_iterator to the element after the last element.
size_type empty() const
Returns if the function is empty.
value_type operator[](size_type i) const
Calculate the or value at position i.
traverse_csa_psi(const traverse_csa_psi &tcsa)
Copy constructor.
int_alphabet_tag alphabet_category
t_csa::value_type value_type
traverse_csa_psi(const t_csa &csa_psi)
Constructor.
A helper class for the function for (compressed) suffix arrays which provide also the inverse suffix...
const_iterator end() const
Returns a const_iterator to the element after the last element.
const_iterator begin() const
Returns a const_iterator to the first element.
traverse_csa_saisa(const traverse_csa_saisa &tcsa)
size_type empty() const
Returns if the function is empty.
traverse_csa_saisa(const t_csa &csa)
Constructor.
size_type size() const
Returns the size of the function.
value_type operator[](size_type i) const
Calculate the value at position i.
t_csa::difference_type difference_type
random_access_const_iterator< traverse_csa_saisa > const_iterator
A wrapper class for the and LF function for (compressed) suffix arrays that are based on a wavelet t...
value_type operator[](size_type i) const
Calculate the value at position i.
t_csa::difference_type difference_type
t_csa::value_type value_type
const_iterator end() const
Returns a const_iterator to the element after the last element.
const_iterator begin() const
Returns a const_iterator to the first element.
traverse_csa_wt(const t_csa &csa_wt)
Constructor.
size_type size() const
Returns the size of the function.
int_alphabet_tag alphabet_category
random_access_const_iterator< traverse_csa_wt > const_iterator
size_type empty() const
Returns if the function is empty.
iterators.hpp contains an generic iterator for random access containers.
int_vector ::size_type size_type
Namespace for the succinct data structure library.
t_csa::char_type first_row_symbol(const typename t_csa::size_type i, const t_csa &csa)
Get the symbol at position i in the first row of the sorted suffixes of CSA.
static value_type access(const t_csa &csa, size_type i)
static value_type access(const t_csa &csa, size_type i)
static value_type access(const t_csa &csa, size_type i)
static value_type access(const t_csa &csa, size_type i)
static value_type access(const t_csa &csa, size_type i)
static value_type access(const t_csa &csa, size_type i)