SDSL  3.0.0
Succinct Data Structure Library
bp_support_gg.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
8 #ifndef INCLUDED_SDSL_BP_SUPPORT_GG
9 #define INCLUDED_SDSL_BP_SUPPORT_GG
10 
11 #include <map>
12 #include <set>
13 #include <stack>
14 #include <stdexcept>
15 #include <utility>
16 
18 #include <sdsl/int_vector.hpp>
20 #include <sdsl/rank_support.hpp>
21 #include <sdsl/select_support.hpp>
22 #include <sdsl/util.hpp>
23 
24 namespace sdsl
25 {
26 
28 
53 // TODO: can rrr_vector replace nearest_neighbour_dictionary?
54 template <class t_nnd = nearest_neighbour_dictionary<30>,
55  class t_rank = rank_support_v5<>,
56  class t_select = select_support_mcl<>,
57  uint32_t t_bs = 840>
59 {
60  static_assert(t_bs > 2, "bp_support_gg: block size must be greater than 2.");
61 
62  public:
64  typedef t_nnd nnd_type;
65  typedef t_rank rank_type;
66  typedef t_select select_type;
68 
69  private:
70  const bit_vector * m_bp = nullptr; // balanced parentheses sequence
71  rank_type m_rank_bp; // rank support for m_bp => see excess() and rank()
72  select_type m_select_bp; // select support for m_bp => see select()
73 
74  nnd_type m_nnd; // nearest neighbour dictionary for pioneers bit_vector
75 
76  bit_vector m_pioneer_bp; // pioneer sequence
77  std::unique_ptr<bp_support_type> m_pioneer_bp_support = nullptr;
78 
79  size_type m_size = 0;
80  size_type m_blocks = 0; // number of blocks
81 
82  public:
83  const rank_type & bp_rank = m_rank_bp;
84  const select_type & bp_select = m_select_bp;
85 
86  bp_support_gg() = default;
87 
89  explicit bp_support_gg(const bit_vector * bp)
90  : m_bp(bp)
91  , m_size(bp == nullptr ? 0 : bp->size())
92  , m_blocks((m_size + t_bs - 1) / t_bs)
93  , bp_rank(m_rank_bp)
94  , bp_select(m_select_bp)
95  {
96  if (bp == nullptr)
97  { // -> m_bp == nullptr
98  return;
99  }
100 
101  util::init_support(m_rank_bp, bp);
102  util::init_support(m_select_bp, bp);
103  {
104  bit_vector pioneer;
105  pioneer = calculate_pioneers_bitmap_succinct(*m_bp, t_bs);
106  m_nnd = nnd_type(pioneer);
107  }
108 
109  m_pioneer_bp.resize(m_nnd.ones());
110  if (m_nnd.ones() > 0 and m_nnd.ones() == m_bp->size())
111  { // m_bp != nullptr see above
112  throw std::logic_error(util::demangle(typeid(this).name()) +
113  ": recursion in the construction does not terminate!");
114  }
115 
116  for (size_type i = 1; i <= m_nnd.ones(); ++i) { m_pioneer_bp[i - 1] = (*m_bp)[m_nnd.select(i)]; }
117 
118  if (m_bp->size() > 0)
119  { // m_bp != nullptr see above
120  m_pioneer_bp_support = std::unique_ptr<bp_support_type>(new bp_support_type(&m_pioneer_bp));
121  }
122  }
123 
126  : m_rank_bp(v.m_rank_bp)
127  , m_select_bp(v.m_select_bp)
128  , m_nnd(v.m_nnd)
129  , m_pioneer_bp(v.m_pioneer_bp)
130  , m_size(v.m_size)
131  , m_blocks(v.m_blocks)
132  {
133 
134  m_rank_bp.set_vector(m_bp);
135  m_select_bp.set_vector(m_bp);
136 
137  m_pioneer_bp_support.reset(nullptr);
138  if (v.m_pioneer_bp_support != nullptr)
139  {
140  m_pioneer_bp_support.reset(new bp_support_type(*(v.m_pioneer_bp_support)));
141  m_pioneer_bp_support->set_vector(&m_pioneer_bp);
142  }
143  }
144 
146  bp_support_gg(bp_support_gg && bp_support) { *this = std::move(bp_support); }
147 
149  ~bp_support_gg() = default;
150 
153  {
154  if (this != &v)
155  {
156  bp_support_gg tmp(v);
157  *this = std::move(tmp);
158  }
159  return *this;
160  }
161 
164  {
165  if (this != &bp_support)
166  {
167  m_bp = bp_support.m_bp;
168  bp_support.m_bp = nullptr;
169 
170  m_rank_bp = std::move(bp_support.m_rank_bp);
171  m_rank_bp.set_vector(m_bp);
172  m_select_bp = std::move(bp_support.m_select_bp);
173  m_select_bp.set_vector(m_bp);
174 
175  m_nnd = std::move(bp_support.m_nnd);
176 
177  m_size = bp_support.m_size;
178  m_blocks = bp_support.m_blocks;
179 
180  m_pioneer_bp = std::move(bp_support.m_pioneer_bp);
181 
182  m_pioneer_bp_support.reset(nullptr);
183  if (bp_support.m_pioneer_bp_support != nullptr)
184  {
185  std::swap(m_pioneer_bp_support, bp_support.m_pioneer_bp_support);
186  m_pioneer_bp_support->set_vector(&m_pioneer_bp);
187  }
188  }
189  return *this;
190  }
191 
192  void set_vector(const bit_vector * bp)
193  {
194  m_bp = bp;
195  m_rank_bp.set_vector(bp);
196  m_select_bp.set_vector(bp);
197  }
198 
202  inline size_type excess(size_type i) const { return (m_rank_bp(i + 1) << 1) - i - 1; }
203 
207  size_type rank(size_type i) const { return m_rank_bp(i + 1); }
208 
214  size_type select(size_type i) const { return m_select_bp(i); }
215 
223  {
224  assert(i < m_size);
225  if (!(*m_bp)[i])
226  { // if there is a closing parenthesis at index i return i
227  return i;
228  }
229  size_type mi = 0; // match for i
230  if ((mi = near_find_closing(*m_bp, i + 1, 1, t_bs)) == i)
231  {
232  const size_type i_ = m_nnd.rank(i + 1) - 1; // lemma that this gives us an opening pioneer
233  assert(m_pioneer_bp[i_] == 1); // assert that i2 is an opening parenthesis
234  size_type mi_ = m_pioneer_bp_support->find_close(i_);
235  assert(m_pioneer_bp[mi_] == 0);
236  mi = m_nnd.select(mi_ + 1); /* matching pioneer position in bp */
237  assert((*m_bp)[mi] == 0);
238  mi = (mi / t_bs) * t_bs;
239  size_type epb2 = excess(mi - 1); // excess of first parenthesis in the pioneer block
240  const size_type ei = excess(i); // excess at position i
241  /* invariant: epb >= ei-1 */ // assert( epb+1 >= ei );
242  return near_find_closing(*m_bp, mi, epb2 - ei + 1, t_bs);
243  }
244  return mi;
245  }
246 
248 
255  {
256  assert(i < m_size);
257  if ((*m_bp)[i])
258  { // if there is a opening parenthesis
259  return i; // return i
260  }
261  size_type mi = 0; // match for i
262  if ((mi = near_find_opening(*m_bp, i - 1, 1, t_bs)) == i)
263  {
264  const size_type i_ = m_nnd.rank(i); // lemma that this gives us an closing pioneer
265  assert(m_pioneer_bp[i_] == 0); // assert that i' is an opening parenthesis
266  const size_type mi_ = m_pioneer_bp_support->find_open(i_);
267  assert(m_pioneer_bp[mi_] == 1);
268  mi = m_nnd.select(mi_ + 1); /* matching pioneer position in bp */
269  assert((*m_bp)[mi] == 1);
270  mi = (mi / t_bs) * t_bs + t_bs - 1;
271  assert(mi < i);
272  size_type epb2 = excess(mi + 1); // excess of last parenthesis in the pioneer block
273  const size_type ei = excess(i); // excess at position i
274  /*invariant: epb >= ei+1*/ // assert( epb >= ei+1 );
275  return near_find_opening(*m_bp, mi, epb2 - ei + 1 - 2 * ((*m_bp)[mi + 1]), t_bs);
276  }
277  return mi;
278  }
279 
281 
287  {
288  assert(i < m_size);
289  if (!(*m_bp)[i])
290  { // if there is closing parenthesis at position i
291  return find_open(i);
292  }
293  const size_type exi = excess(i);
294  if (exi == 1) // if i is not enclosed by a parentheses pair..
295  return size();
296  size_type ei; // enclose for i
297  if ((ei = near_find_opening(*m_bp, i - 1, 1, t_bs)) == i)
298  {
299  const size_type i_ = m_nnd.rank(i); // next parenthesis in the pioneer bitmap
300  size_type ei_; // enclose for i'
301  ei_ = m_pioneer_bp_support->enclose(i_);
302  assert(m_pioneer_bp[ei_] == 1);
303  ei = m_nnd.select(ei_ + 1);
304  assert((*m_bp)[ei] == 1);
305  ei = (ei / t_bs) * t_bs + t_bs - 1;
306  assert(ei < i);
307  size_type epb2 = excess(ei + 1); // excess of last parenthesis in the pioneer block
308  /* invariant epb+1 >= exi */ // assert( epb+1 >= exi );
309  return near_find_opening(*m_bp, ei, epb2 - exi + 1 + 2 * ((*m_bp)[ei + 1] == 0), t_bs);
310  }
311  return ei;
312  }
313 
315 
325  size_type rr_enclose(const size_type i, const size_type j) const
326  {
327  assert(j < m_size);
328  assert((*m_bp)[i] == 1 and (*m_bp)[j] == 1);
329  const size_type mip1 = find_close(i) + 1;
330  if (mip1 >= j) return size();
331  return rmq_open(mip1, j);
332  }
333 
342  size_type rmq_open(const size_type l, const size_type r) const
343  {
344  if (l >= r) return size();
345  size_type min_ex_pos = r;
346 
347  if (l / t_bs == r / t_bs) { min_ex_pos = near_rmq_open(*m_bp, l, r); }
348  else
349  { // parentheses pair does not start in the same block
350  // note: l and r are not in the same block
351  size_type k, ex; // helper variables
352  size_type min_ex = excess(r) + 2 * ((*m_bp)[r] == 0); // minimal excess
353 
354  // 1.2
355  size_type l_ = m_nnd.rank(l); // l_ inclusive
356  size_type r_ = m_nnd.rank(r); // r_ exclusive
357 
358  size_type min_ex_pos_ = m_pioneer_bp_support->rmq_open(l_, r_);
359  if (min_ex_pos_ < r_)
360  {
361  k = m_nnd.select(min_ex_pos_ + 1);
362  min_ex = excess(k);
363  min_ex_pos = k;
364  }
365  else
366  {
367  // 1.1
368  k = near_rmq_open(*m_bp, (r / t_bs) * t_bs, r);
369  if (k < r)
370  {
371  assert(excess(k) < min_ex);
372  min_ex = excess(k);
373  min_ex_pos = k;
374  }
375  }
376  // 1.3
377  k = near_rmq_open(*m_bp, l, (l / t_bs + 1) * t_bs);
378  if (k < (l / t_bs + 1) * t_bs and (ex = excess(k)) < min_ex)
379  {
380  min_ex = ex;
381  min_ex_pos = k;
382  }
383  }
384  // 1.4
385  if (min_ex_pos < r)
386  return min_ex_pos;
387  else
388  return size();
389  }
390 
392 
400  {
401  assert(j > i and j < m_size);
402  assert((*m_bp)[i] == 1 and (*m_bp)[j] == 1);
403  size_type mi = find_close(i); // matching parenthesis to i
404  assert(mi > i and mi < j);
405  assert(find_close(j) > j);
406  size_type k = enclose(j);
407  if (k == m_size or k < i) // there exists no opening parenthesis at position mi<k<j.
408  return m_size;
409  size_type kk;
410  do {
411  kk = k;
412  k = enclose(k);
413  } while (k != m_size and k > mi);
414  return kk;
415  }
416 
418 
423  {
424  assert(l <= r);
425  size_type m = rmq_open(l, r + 1);
426  if (m == size())
427  return r;
428  else if (m == l)
429  return l;
430  else
431  { // m>l and m<=r
432  assert(0 == (*m_bp)[m - 1]);
433  return m - 1;
434  }
435  }
436 
438 
444  {
445  assert(j > i);
446  assert((*m_bp)[i] == 1 and (*m_bp)[j] == 1);
447  size_type k = rr_enclose(i, j);
448  if (k == size())
449  return enclose(j);
450  else
451  return enclose(k);
452  }
453 
455 
458  {
459  assert(i < m_size);
460  if (!i) return 0;
461  size_type ones = m_rank_bp(i);
462  if (ones)
463  { // ones > 0
464  assert(m_select_bp(ones) < i);
465  return i - m_select_bp(ones) - 1;
466  }
467  else
468  {
469  return i;
470  }
471  }
472 
476  size_type size() const { return m_size; }
477 
479 
483  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
484  {
485  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
486  size_type written_bytes = 0;
487  written_bytes += write_member(m_size, out, child, "size");
488  written_bytes += write_member(m_blocks, out, child, "block_cnt");
489 
490  written_bytes += m_rank_bp.serialize(out, child, "bp_rank");
491  written_bytes += m_select_bp.serialize(out, child, "bp_select");
492  written_bytes += m_nnd.serialize(out, child, "nearest_neighbour_dictionary");
493 
494  written_bytes += m_pioneer_bp.serialize(out, child, "pioneer_bp");
495  if (m_bp != nullptr and m_bp->size() > 0)
496  written_bytes += m_pioneer_bp_support->serialize(out, child, "pioneer_bp_support");
497  structure_tree::add_size(child, written_bytes);
498  return written_bytes;
499  }
500 
502 
506  void load(std::istream & in, const bit_vector * bp)
507  {
508  m_bp = bp;
509  read_member(m_size, in);
510  read_member(m_blocks, in);
511 
512  m_rank_bp.load(in, m_bp);
513  m_select_bp.load(in, m_bp);
514  m_nnd.load(in);
515 
516  m_pioneer_bp.load(in);
517  m_pioneer_bp_support.reset(nullptr);
518  if (m_bp != nullptr and m_bp->size() > 0)
519  {
520  m_pioneer_bp_support.reset(new bp_support_type());
521  m_pioneer_bp_support->load(in, &m_pioneer_bp);
522  }
523  }
524 
525  template <typename archive_t>
526  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
527  {
528  ar(CEREAL_NVP(m_size));
529  ar(CEREAL_NVP(m_blocks));
530  ar(CEREAL_NVP(m_rank_bp));
531  ar(CEREAL_NVP(m_select_bp));
532  ar(CEREAL_NVP(m_nnd));
533  ar(CEREAL_NVP(m_pioneer_bp));
534  ar(CEREAL_NVP(m_pioneer_bp_support));
535  }
536 
537  template <typename archive_t>
538  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
539  {
540  ar(CEREAL_NVP(m_size));
541  ar(CEREAL_NVP(m_blocks));
542  ar(CEREAL_NVP(m_rank_bp));
543  ar(CEREAL_NVP(m_select_bp));
544  ar(CEREAL_NVP(m_nnd));
545  ar(CEREAL_NVP(m_pioneer_bp));
546  ar(CEREAL_NVP(m_pioneer_bp_support));
547  if (m_pioneer_bp_support != nullptr) { m_pioneer_bp_support->set_vector(&m_pioneer_bp); }
548  }
549 
551  bool operator==(bp_support_gg const & other) const noexcept
552  {
553  return (m_size == other.m_size) && (m_blocks == other.m_blocks) && (m_rank_bp == other.m_rank_bp) &&
554  (m_select_bp == other.m_select_bp) && (m_nnd == other.m_nnd) && (m_pioneer_bp == other.m_pioneer_bp) &&
555  ((m_pioneer_bp_support == other.m_pioneer_bp_support) ||
556  (*m_pioneer_bp_support == *other.m_pioneer_bp_support));
557  }
558 
560  bool operator!=(bp_support_gg const & other) const noexcept { return !(*this == other); }
561 };
562 
563 } // namespace sdsl
564 
565 #endif
bp_support_algorithm.hpp contains algorithms for balanced parentheses sequences.
#define CEREAL_NVP(X)
Definition: cereal.hpp:30
A class that provides support for bit_vectors that represent a BP sequence.
size_type rr_enclose(const size_type i, const size_type j) const
Range restricted enclose operation.
size_type preceding_closing_parentheses(size_type i) const
Return the number of zeros which procede position i in the balanced parentheses sequence.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the bp_support_gg to a stream.
const select_type & bp_select
size_type size() const
The size of the supported balanced parentheses sequence.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
size_type rmq_open(const size_type l, const size_type r) const
Search the interval [l,r-1] for an opening parenthesis, say i, such that find_close(i) >= r.
void set_vector(const bit_vector *bp)
size_type double_enclose(size_type i, size_type j) const
The double enclose operation.
size_type find_open(size_type i) const
Calculate the matching opening parenthesis.
size_type rank(size_type i) const
Returns the number of opening parentheses up to and including index i.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
size_type rr_enclose_naive(size_type i, size_type j) const
The range restricted enclose operation.
size_type find_close(size_type i) const
Calculate the index of the matching closing parenthesis to the parenthesis at index i.
bp_support_gg< nnd_type, rank_type, select_support_scan<>, t_bs > bp_support_type
size_type rmq(size_type l, size_type r) const
The range minimum query (rmq) returns the index of the parenthesis with minimal excess in the range .
bp_support_gg & operator=(const bp_support_gg &v)
Assignment operator.
const rank_type & bp_rank
size_type excess(size_type i) const
Calculates the excess value at index i.
void load(std::istream &in, const bit_vector *bp)
Load the bp_support_gg for a bit_vector v.
bp_support_gg()=default
bp_support_gg & operator=(bp_support_gg &&bp_support)
Assignment Move operator.
bp_support_gg(const bit_vector *bp)
Constructor.
bool operator==(bp_support_gg const &other) const noexcept
Equality operator.
bit_vector::size_type size_type
bool operator!=(bp_support_gg const &other) const noexcept
Inequality operator.
bp_support_gg(bp_support_gg &&bp_support)
Move constructor.
size_type enclose(size_type i) const
Calculate enclose.
~bp_support_gg()=default
Destructor.
size_type select(size_type i) const
Returns the index of the i-th opening parenthesis.
bp_support_gg(const bp_support_gg &v)
Copy constructor.
int_vector_size_type size_type
Definition: int_vector.hpp:266
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
void resize(const size_type size)
Resize the int_vector in terms of elements.
Definition: int_vector.hpp:545
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
int_vector.hpp contains the sdsl::int_vector class.
Namespace for the succinct data structure library.
uint64_t near_find_closing(const bit_vector &bp, uint64_t i, uint64_t closings, const uint64_t block_size)
uint64_t near_rmq_open(const bit_vector &bp, const uint64_t begin, const uint64_t end)
void swap(int_vector_reference< t_int_vector > x, int_vector_reference< t_int_vector > y) noexcept
Definition: int_vector.hpp:970
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
uint64_t near_find_opening(const bit_vector &bp, uint64_t i, const uint64_t openings, const uint64_t block_size)
bit_vector calculate_pioneers_bitmap_succinct(const bit_vector &bp, uint64_t block_size)
Space-efficient version of calculate_pioneers_bitmap.
nearest_neighbour_dictionary.hpp contains a class which supports rank/select for sparse populated sds...
rank_support.hpp contains classes that support a sdsl::bit_vector with constant time rank information...
select_support.hpp contains classes that support a sdsl::bit_vector with constant time select informa...
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.