SDSL  3.0.0
Succinct Data Structure Library
suffix_tree_helper.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
4 #ifndef INCLUDED_SDSL_SUFFIX_TREE_HELPER
5 #define INCLUDED_SDSL_SUFFIX_TREE_HELPER
6 
7 #include <cassert>
8 #include <cstdlib>
9 #include <stack>
10 #include <stdint.h>
11 
12 #include <sdsl/iterators.hpp>
15 
16 namespace sdsl
17 {
18 
19 template <class t_cst>
20 class cst_node_child_proxy_iterator : public std::iterator<std::forward_iterator_tag, typename t_cst::node_type>
21 {
22  public:
23  using node_type = typename t_cst::node_type;
25  using const_reference = const node_type;
27 
28  private:
29  const t_cst * m_cst;
30  node_type m_cur_node;
31 
32  public:
34  : m_cst(nullptr){};
36  : m_cst(cst)
37  , m_cur_node(v)
38  {}
40  : m_cst(it.m_cst)
41  , m_cur_node(it.m_cur_node)
42  {}
43 
44  public:
45  const_reference operator*() const { return m_cur_node; }
47  {
48  m_cur_node = m_cst->sibling(m_cur_node);
49  return *this;
50  }
52  {
53  iterator_type it = *this;
54  ++(*this);
55  return it;
56  }
57  bool operator==(const iterator_type & it) const { return it.m_cur_node == m_cur_node; }
58  bool operator!=(const iterator_type & it) const { return !(*this == it); }
59 };
60 
61 template <class t_cst>
63 {
64  public: // types
66  using node_type = typename t_cst::node_type;
67  using size_type = typename t_cst::size_type;
68 
69  private: // data
70  node_type m_parent;
71  const t_cst * m_cst;
72 
73  public: // constructors
75  explicit cst_node_child_proxy(const t_cst * cst, node_type v)
76  : m_parent(v)
77  , m_cst(cst){};
79  : m_parent(p.m_parent)
80  , m_cst(p.m_cst){};
81 
82  public: // methods
84  {
85  return m_cst->select_child(m_parent, i + 1);
86  } // enumeration starts with 1 not 0
87  size_type size() { return m_cst->degree(m_parent); }
88  iterator_type begin() const { return iterator_type(m_cst, m_cst->select_child(m_parent, 1)); }
89  iterator_type end() const { return iterator_type(m_cst, m_cst->root()); }
90 };
91 
93 
102 template <class t_rac>
103 void construct_supercartesian_tree_bp(const t_rac & vec, bit_vector & bp, const bool minimum = true)
104 {
105  typedef typename t_rac::size_type size_type;
106  bp.resize(2 * vec.size()); // resize bit vector for balanaced parantheses to 2 n bits
107  util::set_to_value(bp, 0);
108  std::stack<typename t_rac::value_type> vec_stack;
109 
110  size_type k = 0;
111  for (size_type i = 0; i < vec.size(); ++i)
112  {
113  typename t_rac::value_type l = vec[i];
114  if (minimum)
115  {
116  while (vec_stack.size() > 0 and l < vec_stack.top())
117  {
118  vec_stack.pop();
119  ++k;
120  /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis
121  }
122  }
123  else
124  {
125  while (vec_stack.size() > 0 and l > vec_stack.top())
126  {
127  vec_stack.pop();
128  ++k;
129  /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis
130  }
131  }
132  vec_stack.push(l);
133  bp[k++] = 1; // writing an opening parenthesis
134  }
135  while (vec_stack.size() > 0)
136  {
137  vec_stack.pop();
138  bp[k++] = 0; // writing a closing parenthesis
139  }
140  assert(k == 2 * vec.size());
141 }
142 
144 
153 template <class t_rac>
154 bit_vector construct_supercartesian_tree_bp_succinct(const t_rac & vec, const bool minimum = true)
155 {
156  typedef typename t_rac::size_type size_type;
157  bit_vector bp(2 * vec.size(), 0); // initialize result
158  if (vec.size() > 0)
159  {
160  sorted_stack_support vec_stack(vec.size());
161 
162  size_type k = 0;
163  if (minimum)
164  {
165  bp[k++] = 1;
166  for (size_type i = 1; i < vec.size(); ++i)
167  {
168  if (vec[i] < vec[i - 1])
169  {
170  ++k;
171  while (vec_stack.size() > 0 and vec[i] < vec[vec_stack.top()])
172  {
173  vec_stack.pop();
174  ++k; // writing a closing parenthesis, bp is already initialized to zero
175  }
176  }
177  else
178  {
179  vec_stack.push(i - 1); // "lazy stack" trick: speed-up approx. 25%
180  }
181  bp[k++] = 1; // writing an opening parenthesis
182  }
183  }
184  else
185  {
186  // no "lazy stack" trick used here
187  for (size_type i = 0; i < vec.size(); ++i)
188  {
189  while (vec_stack.size() > 0 and vec[i] > vec[vec_stack.top()])
190  {
191  vec_stack.pop();
192  ++k;
193  /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis
194  }
195  vec_stack.push(i);
196  bp[k++] = 1; // writing an opening parenthesis
197  }
198  }
199  }
200  return bp;
201 }
202 
204 
212 template <uint8_t t_width>
214 {
216  bit_vector bp(2 * lcp_buf.size(), 0); // initialize result
217  if (lcp_buf.size() > 0)
218  {
219  sorted_multi_stack_support vec_stack(lcp_buf.size());
220 
221  size_type k = 0;
222  if (minimum)
223  {
224  bp[k++] = 1;
225  size_type last = lcp_buf[0];
226  for (size_type i = 1, x; i < lcp_buf.size(); ++i)
227  {
228  x = lcp_buf[i];
229  if (x < last)
230  {
231  ++k; // writing a closing parenthesis for last
232  while (!vec_stack.empty() and x < vec_stack.top())
233  {
234  vec_stack.pop();
235  ++k; // writing a closing parenthesis, bp is already initialized to zeros
236  }
237  }
238  else
239  {
240  vec_stack.push(last); // "lazy stack" trick: speed-up about 25 %
241  }
242  bp[k++] = 1; // writing an opening parenthesis
243  last = x;
244  }
245  }
246  else
247  {
248  // no "lazy stack" trick use here
249  for (size_type i = 0, x; i < lcp_buf.size(); ++i)
250  {
251  x = lcp_buf[i];
252  while (!vec_stack.empty() and x > vec_stack.top())
253  {
254  vec_stack.pop();
255  ++k; // writing a closing parenthesis, bp is already initialized to zeros
256  }
257  vec_stack.push(x);
258  bp[k++] = 1; // writing an opening parenthesis
259  }
260  }
261  }
262  return bp;
263 }
264 
267 
276 template <uint8_t t_width>
278  bit_vector & bp,
279  bit_vector & bp_fc,
280  const bool minimum = true)
281 {
283  size_type n = lcp_buf.size();
284  bp.resize(2 * n); // resize bit vector for balanced parentheses to 2 n bits
285  bp_fc.resize(n);
286  if (n == 0) // if n == 0 we are done
287  return 0;
288  size_type fc_cnt = 0; // first child counter
289  util::set_to_value(bp, 0);
290  util::set_to_value(bp_fc, 0);
291  sorted_multi_stack_support vec_stack(n);
292 
293  size_type k = 0;
294  size_type k_fc = 0; // first child index
295  if (minimum)
296  {
297  // no "lazy stack" trick used here
298  for (size_type i = 0, x; i < n; ++i)
299  {
300  x = lcp_buf[i];
301  while (!vec_stack.empty() and x < vec_stack.top())
302  {
303  if (vec_stack.pop())
304  {
305  bp_fc[k_fc] = 1;
306  ++fc_cnt;
307  }
308  ++k; // writing a closing parenthesis, bp is already initialized to zeros
309  ++k_fc; // write a bit in first_child
310  }
311  vec_stack.push(x);
312  bp[k++] = 1; // writing an opening parenthesis
313  }
314  }
315  else
316  {
317  // no "lazy stack" trick used here
318  for (size_type i = 0, x; i < n; ++i)
319  {
320  x = lcp_buf[i];
321  while (!vec_stack.empty() and x > vec_stack.top())
322  {
323  if (vec_stack.pop())
324  {
325  bp_fc[k_fc] = 1;
326  ++fc_cnt;
327  }
328  ++k; // writing a closing parenthesis, bp is already initialized to zeros
329  ++k_fc; // write a bit in first_child
330  }
331  vec_stack.push(x);
332  bp[k++] = 1; // writing an opening parenthesis
333  }
334  }
335  while (!vec_stack.empty())
336  {
337  if (vec_stack.pop())
338  {
339  bp_fc[k_fc] = 1;
340  ++fc_cnt;
341  }
342  // writing a closing parenthesis in bp, not necessary as bp is initialized with zeros
343  ++k;
344  ++k_fc;
345  }
346  return fc_cnt;
347 }
348 
349 // Gets ISA[SA[idx]+d]
350 // d = depth of the character 0 = first position
351 template <class t_csa>
352 typename t_csa::size_type get_char_pos(typename t_csa::size_type idx, typename t_csa::size_type d, const t_csa & csa)
353 {
354  if (d == 0) return idx;
355  // if we have to apply \f$\LF\f$ or \f$\Phi\f$ more
356  // than 2*d times to calc csa(csa[idx]+d), we opt to
357  // apply \f$ \Phi \f$ d times
358  if (csa.sa_sample_dens + csa.isa_sample_dens > 2 * d + 2)
359  {
360  for (typename t_csa::size_type i = 0; i < d; ++i) idx = csa.psi[idx];
361  return idx;
362  }
363  return csa.isa[csa[idx] + d];
364 }
365 
366 // has_id<X>::value is true if class X has
367 // implement method id
368 // Adapted solution from jrok's proposal:
369 // http://stackoverflow.com/questions/87372/check-if-a-class-has-a-member-function-of-a-given-signature
370 template <typename t_wt>
371 struct has_id
372 {
373  template <typename T>
374  static constexpr auto
375  check(T *) -> typename std::is_same<decltype(std::declval<T>().id(std::declval<typename T::node_type &>())),
376  typename T::size_type>::type
377  {
378  return std::true_type();
379  }
380  template <typename>
381  static constexpr std::false_type check(...)
382  {
383  return std::false_type();
384  }
385  typedef decltype(check<t_wt>(nullptr)) type;
386  static constexpr bool value = type::value;
387 };
388 } // namespace sdsl
389 
390 #endif
bool operator!=(const iterator_type &it) const
bool operator==(const iterator_type &it) const
cst_node_child_proxy_iterator(const t_cst *cst, node_type v)
cst_node_child_proxy_iterator(const iterator_type &it)
typename t_cst::size_type size_type
typename t_cst::node_type node_type
iterator_type begin() const
cst_node_child_proxy(const cst_node_child_proxy &p)
cst_node_child_proxy(const t_cst *cst, node_type v)
cst_node_child_proxy_iterator< t_cst > iterator_type
node_type operator[](size_type i) const
uint64_t size() const
Returns the number of elements currently stored.
int_vector_size_type size_type
Definition: int_vector.hpp:266
size_type size() const noexcept
The number of elements in the int_vector.
void resize(const size_type size)
Resize the int_vector in terms of elements.
Definition: int_vector.hpp:545
Stack which contains elements from [0..n] in sorted order. Duplicates are possible.
bool empty() const
Returns if the stack is empty.
size_type top() const
Returns the topmost index on the stack.
bool pop()
Pop the topmost index of the stack.
bool push(size_type x)
Push the index x of vector vec onto the stack.
A stack which contains strictly increasing numbers in the range from to .
size_type top() const
Returns the topmost index on the stack.
void pop()
Pop the topmost index of the stack.
void push(size_type x)
Push the index x of vector vec onto the stack.
size_type size() const
Returns the number of element is the stack.
iterators.hpp contains an generic iterator for random access containers.
int_vector ::size_type size_type
void set_to_value(t_int_vec &v, uint64_t k)
Set all entries of int_vector to value k.
Definition: util.hpp:566
Namespace for the succinct data structure library.
t_csa::size_type get_char_pos(typename t_csa::size_type idx, typename t_csa::size_type d, const t_csa &csa)
bit_vector construct_supercartesian_tree_bp_succinct(const t_rac &vec, const bool minimum=true)
Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE...
bit_vector::size_type construct_supercartesian_tree_bp_succinct_and_first_child(int_vector_buffer< t_width > &lcp_buf, bit_vector &bp, bit_vector &bp_fc, const bool minimum=true)
Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE...
void construct_supercartesian_tree_bp(const t_rac &vec, bit_vector &bp, const bool minimum=true)
Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE...
sorted_multi_stack_support.hpp contains a data structure for a stack which contains elements from [0....
static constexpr std::false_type check(...)
static constexpr auto check(T *) -> typename std::is_same< decltype(std::declval< T >().id(std::declval< typename T::node_type & >())), typename T::size_type >::type
static constexpr bool value
decltype(check< t_wt >(nullptr)) typedef type