SDSL  3.0.0
Succinct Data Structure Library
k2_treap.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
8 #ifndef INCLUDED_SDSL_K2_TREAP
9 #define INCLUDED_SDSL_K2_TREAP
10 
11 #include <algorithm>
12 #include <climits>
13 #include <tuple>
14 #include <vector>
15 
16 #include <sdsl/bits.hpp>
18 #include <sdsl/k2_treap_helper.hpp>
19 #include <sdsl/vectors.hpp>
20 
22 namespace sdsl
23 {
24 
26 
46 template <uint8_t t_k,
47  typename t_bv = bit_vector,
48  typename t_rank = typename t_bv::rank_1_type,
49  typename t_max_vec = dac_vector<>>
50 class k2_treap
51 {
52  static_assert(t_k > 1, "t_k has to be larger than 1.");
53  static_assert(t_k <= 16, "t_k has to be smaller than 17.");
54 
55  public:
60 
61  enum
62  {
63  k = t_k
64  };
65 
66  private:
67  uint8_t m_t = 0;
68  t_bv m_bp;
69  t_rank m_bp_rank;
70  t_max_vec m_maxval;
71  std::vector<int_vector<>> m_coord;
72  int_vector<64> m_level_idx;
73 
74  template <typename t_tv>
75  uint8_t get_t(const t_tv & v)
76  {
77  using namespace k2_treap_ns;
78  if (v.size() == 0) { return 0; }
79  using t_e = typename t_tv::value_type;
80  auto tupmax = [](t_e a) { return std::max(std::get<0>(a), std::get<1>(a)); };
81  auto max_it = std::max_element(std::begin(v), std::end(v), [&](t_e a, t_e b) { return tupmax(a) < tupmax(b); });
82  uint64_t x = tupmax(*max_it);
83  uint8_t res = 0;
84  while (precomp<t_k>::exp(res) <= x) { ++res; }
85  return res;
86  }
87 
88  public:
89  uint8_t & t = m_t;
90 
91  k2_treap() = default;
92 
93  k2_treap(const k2_treap & tr)
94  : m_t(tr.m_t)
95  , m_bp(tr.m_bp)
96  , m_bp_rank(tr.m_bp_rank)
97  , m_maxval(tr.m_maxval)
98  , m_coord(tr.m_coord)
99  , m_level_idx(tr.m_level_idx)
100  {
101  m_bp_rank.set_vector(&m_bp);
102  }
103 
104  k2_treap(k2_treap && tr) { *this = std::move(tr); }
105 
108  {
109  if (this != &tr)
110  {
111  m_t = tr.m_t;
112  m_bp = std::move(tr.m_bp);
113  m_bp_rank = std::move(tr.m_bp_rank);
114  m_bp_rank.set_vector(&m_bp);
115  m_maxval = std::move(tr.m_maxval);
116  m_coord = std::move(tr.m_coord);
117  m_level_idx = std::move(tr.m_level_idx);
118  }
119  return *this;
120  }
121 
124  {
125  if (this != &tr)
126  {
127  k2_treap tmp(tr);
128  *this = std::move(tmp);
129  }
130  return *this;
131  }
132 
134  size_type size() const { return m_maxval.size(); }
135 
137  int_vector_buffer<> & buf_y,
138  int_vector_buffer<> & buf_w,
139  std::string temp_dir)
140  {
141  using namespace k2_treap_ns;
142  typedef int_vector_buffer<> * t_buf_p;
143  std::vector<t_buf_p> bufs = { &buf_x, &buf_y, &buf_w };
144 
145  auto max_element = [](int_vector_buffer<> & buf) {
146  uint64_t max_val = 0;
147  for (auto val : buf) { max_val = std::max((uint64_t)val, max_val); }
148  return max_val;
149  };
150 
151  auto max_buf_element = [&]() {
152  uint64_t max_v = 0;
153  for (auto buf : bufs)
154  {
155  uint64_t _max_v = max_element(*buf);
156  max_v = std::max(max_v, _max_v);
157  }
158  return max_v;
159  };
160 
161  uint64_t x = max_buf_element();
162  uint8_t res = 0;
163  while (res <= 64 and precomp<t_k>::exp(res) <= x) { ++res; }
164  if (res == 65) { throw std::logic_error("Maximal element of input is too big."); }
165 
166  if (precomp<t_k>::exp(res) <= std::numeric_limits<uint32_t>::max())
167  {
168  auto v = read<uint32_t, uint32_t, uint32_t>(bufs);
169  construct(v, temp_dir);
170  }
171  else
172  {
173  auto v = read<uint64_t, uint64_t, uint64_t>(bufs);
174  construct(v, temp_dir);
175  }
176  }
177 
178  template <typename t_x = uint64_t, typename t_y = uint64_t, typename t_w = uint64_t>
179  std::vector<std::tuple<t_x, t_y, t_w>> read(std::vector<int_vector_buffer<> *> & bufs)
180  {
181  typedef std::vector<std::tuple<t_x, t_y, t_w>> t_tuple_vec;
182  t_tuple_vec v = t_tuple_vec(bufs[0]->size());
183  for (uint64_t j = 0; j < v.size(); ++j) { std::get<0>(v[j]) = (*(bufs[0]))[j]; }
184  for (uint64_t j = 0; j < v.size(); ++j) { std::get<1>(v[j]) = (*(bufs[1]))[j]; }
185  for (uint64_t j = 0; j < v.size(); ++j) { std::get<2>(v[j]) = (*(bufs[2]))[j]; }
186  return v;
187  }
188 
189  template <typename t_x, typename t_y, typename t_w>
190  k2_treap(std::vector<std::tuple<t_x, t_y, t_w>> & v, std::string temp_dir = ".")
191  {
192  if (v.size() > 0) { construct(v, temp_dir); }
193  }
194 
195  template <typename t_x, typename t_y, typename t_w>
196  void construct(std::vector<std::tuple<t_x, t_y, t_w>> & v, std::string temp_dir = ".")
197  {
198  using namespace k2_treap_ns;
199  using t_e = std::tuple<t_x, t_y, t_w>;
200  m_t = get_t(v);
201  uint64_t M = precomp<t_k>::exp(t);
202  t_e MM = t_e(M, M, M);
203 
204  std::string id_part = util::to_string(util::pid()) + "_" + util::to_string(util::id());
205 
206  m_coord.resize(t);
207  m_level_idx = int_vector<64>(1 + t, 0);
208 
209  std::string val_file = temp_dir + "/k2_treap_" + id_part + ".sdsl";
210  std::string bp_file = temp_dir + "/bp_" + id_part + ".sdsl";
211 
212  {
213  int_vector_buffer<> val_buf(val_file, std::ios::out);
214  int_vector_buffer<1> bp_buf(bp_file, std::ios::out);
215 
216  auto end = std::end(v);
217  uint64_t last_level_nodes = 1;
218  uint64_t level_nodes;
219  for (uint64_t l = t, cc = 0; l + 1 > 0; --l)
220  {
221  if (l > 0)
222  {
223  m_level_idx[l - 1] = m_level_idx[l] + last_level_nodes;
224  m_coord[l - 1] = int_vector<>(2 * last_level_nodes, 0, bits::hi(precomp<t_k>::exp(l)) + 1);
225  }
226  level_nodes = 0;
227  cc = 0;
228  auto sp = std::begin(v);
229  for (auto ep = sp; ep != end;)
230  {
231  ep = std::find_if(sp, end, [&sp, &l](const t_e & e) {
232  auto x1 = std::get<0>(*sp);
233  auto y1 = std::get<1>(*sp);
234  auto x2 = std::get<0>(e);
235  auto y2 = std::get<1>(e);
236  return precomp<t_k>::divexp(x1, l) != precomp<t_k>::divexp(x2, l) or
237  precomp<t_k>::divexp(y1, l) != precomp<t_k>::divexp(y2, l);
238  });
239  auto max_it = std::max_element(sp, ep, [](t_e a, t_e b) {
240  if (std::get<2>(a) != std::get<2>(b))
241  return std::get<2>(a) < std::get<2>(b);
242  else if (std::get<0>(a) != std::get<0>(b))
243  return std::get<0>(a) > std::get<0>(b);
244  return std::get<1>(a) > std::get<1>(b);
245  });
246  if (l > 0)
247  {
248  m_coord[l - 1][2 * cc] = precomp<t_k>::modexp(std::get<0>(*max_it), l);
249  m_coord[l - 1][2 * cc + 1] = precomp<t_k>::modexp(std::get<1>(*max_it), l);
250  ++cc;
251  }
252 
253  val_buf.push_back(std::get<2>(*max_it));
254  *max_it = MM;
255  --ep;
256  std::swap(*max_it, *ep);
257  if (l > 0)
258  {
259  auto _sp = sp;
260 
261  for (uint8_t i = 0; i < t_k; ++i)
262  {
263  auto _ep = ep;
264  if (i + 1 < t_k)
265  {
266  _ep = std::partition(_sp, ep, [&i, &l](const t_e & e) {
267  return precomp<t_k>::divexp(std::get<0>(e), l - 1) % t_k <= i;
268  });
269  }
270  auto __sp = _sp;
271  for (uint8_t j = 0; j < t_k; ++j)
272  {
273  auto __ep = _ep;
274  if (j + 1 < t_k)
275  {
276  __ep = std::partition(__sp, _ep, [&j, &l](const t_e & e) {
277  return precomp<t_k>::divexp(std::get<1>(e), l - 1) % t_k <= j;
278  });
279  }
280  bool not_empty = __ep > __sp;
281  bp_buf.push_back(not_empty);
282  level_nodes += not_empty;
283  __sp = __ep;
284  }
285  _sp = _ep;
286  }
287  }
288  ++ep;
289  sp = ep;
290  }
291  end = std::remove_if(begin(v), end, [&](t_e e) { return e == MM; });
292  last_level_nodes = level_nodes;
293  }
294  }
295  bit_vector bp;
296  load_from_file(bp, bp_file);
297  {
298  int_vector_buffer<> val_rw(val_file, std::ios::in | std::ios::out);
299  int_vector_buffer<> val_r(val_file, std::ios::in);
300  uint64_t bp_idx = bp.size();
301  uint64_t r_idx = m_level_idx[0];
302  uint64_t rw_idx = val_rw.size();
303  while (bp_idx > 0)
304  {
305  --r_idx;
306  for (size_t i = 0; i < t_k * t_k; ++i)
307  {
308  if (bp[--bp_idx])
309  {
310  --rw_idx;
311  val_rw[rw_idx] = val_r[r_idx] - val_rw[rw_idx];
312  }
313  }
314  }
315  }
316  {
317  int_vector_buffer<> val_r(val_file);
318  m_maxval = t_max_vec(val_r);
319  }
320  {
321  bit_vector _bp(std::move(bp));
322  m_bp = t_bv(_bp);
323  }
324  util::init_support(m_bp_rank, &m_bp);
325  sdsl::remove(bp_file);
326  sdsl::remove(val_file);
327  }
328 
330  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
331  {
332  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
333  size_type written_bytes = 0;
334  written_bytes += write_member(m_t, out, child, "t");
335  written_bytes += m_bp.serialize(out, child, "bp");
336  written_bytes += m_bp_rank.serialize(out, child, "bp_rank");
337  written_bytes += serialize_vector(m_coord, out, child, "coord");
338  written_bytes += m_maxval.serialize(out, child, "maxval");
339  written_bytes += m_level_idx.serialize(out, child, "level_idx");
340  structure_tree::add_size(child, written_bytes);
341  return written_bytes;
342  }
343 
345  void load(std::istream & in)
346  {
347  read_member(m_t, in);
348  m_bp.load(in);
349  m_bp_rank.load(in);
350  m_bp_rank.set_vector(&m_bp);
351  m_coord.resize(t);
352  load_vector(m_coord, in);
353  m_maxval.load(in);
354  m_level_idx.load(in);
355  }
356 
358  template <typename archive_t>
359  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
360  {
361  ar(CEREAL_NVP(m_t));
362  ar(CEREAL_NVP(m_bp));
363  ar(CEREAL_NVP(m_bp_rank));
364  ar(CEREAL_NVP(m_coord));
365  ar(CEREAL_NVP(m_maxval));
366  ar(CEREAL_NVP(m_level_idx));
367  }
368 
370  template <typename archive_t>
371  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
372  {
373  ar(CEREAL_NVP(m_t));
374  ar(CEREAL_NVP(m_bp));
375  ar(CEREAL_NVP(m_bp_rank));
376  m_bp_rank.set_vector(&m_bp);
377  ar(CEREAL_NVP(m_coord));
378  ar(CEREAL_NVP(m_maxval));
379  ar(CEREAL_NVP(m_level_idx));
380  }
381 
383  bool operator==(k2_treap const & other) const noexcept
384  {
385  return (m_t == other.m_t) && (m_bp == other.m_bp) && (m_bp_rank == other.m_bp_rank) &&
386  (m_maxval == other.m_maxval) && (m_coord == other.m_coord) && (m_level_idx == other.m_level_idx);
387  }
388 
390  bool operator!=(k2_treap const & other) const noexcept { return !(*this == other); }
391 
392  node_type root() const
393  {
394  return node_type(t, t_p(0, 0), 0, m_maxval[0], t_p(m_coord[t - 1][0], m_coord[t - 1][1]));
395  }
396 
397  bool is_leaf(const node_type & v) const { return v.idx >= m_bp.size(); }
398 
399  std::vector<node_type> children(const node_type & v) const
400  {
401  using namespace k2_treap_ns;
402  std::vector<node_type> res;
403  if (!is_leaf(v))
404  {
405  uint64_t rank = m_bp_rank(v.idx);
406  auto x = std::real(v.p);
407  auto y = std::imag(v.p);
408 
409  for (size_t i = 0; i < t_k; ++i)
410  {
411  for (size_t j = 0; j < t_k; ++j)
412  {
413  // get_int better for compressed bitvectors
414  // or introduce cache for bitvectors
415  if (m_bp[v.idx + t_k * i + j])
416  {
417  ++rank;
418 
419  auto _x = x + i * precomp<t_k>::exp(v.t - 1);
420  auto _y = y + j * precomp<t_k>::exp(v.t - 1);
421 
422  auto _max_v = v.max_v - m_maxval[rank];
423  auto _max_p = t_p(_x, _y);
424  if (v.t > 1)
425  {
426  auto y = rank - m_level_idx[v.t - 1];
427  _max_p = t_p(_x + m_coord[v.t - 2][2 * y], _y + m_coord[v.t - 2][2 * y + 1]);
428  }
429  res.emplace_back(v.t - 1, t_p(_x, _y), rank * t_k * t_k, _max_v, _max_p);
430  }
431  }
432  }
433  }
434  return res;
435  }
436 };
437 } // namespace sdsl
438 #endif
bits.hpp contains the sdsl::bits class.
#define CEREAL_NVP(X)
Definition: cereal.hpp:30
uint64_t size() const
Returns the number of elements currently stored.
void push_back(const uint64_t value)
Appends the given element value to the end of the int_vector_buffer.
A generic vector class for integers of width .
Definition: int_vector.hpp:253
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
A k^2-treap.
Definition: k2_treap.hpp:51
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal.
Definition: k2_treap.hpp:359
k2_treap()=default
k2_treap(std::vector< std::tuple< t_x, t_y, t_w >> &v, std::string temp_dir=".")
Definition: k2_treap.hpp:190
k2_treap(int_vector_buffer<> &buf_x, int_vector_buffer<> &buf_y, int_vector_buffer<> &buf_w, std::string temp_dir)
Definition: k2_treap.hpp:136
void load(std::istream &in)
Loads the data structure from the given istream.
Definition: k2_treap.hpp:345
void construct(std::vector< std::tuple< t_x, t_y, t_w >> &v, std::string temp_dir=".")
Definition: k2_treap.hpp:196
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Load via cereal.
Definition: k2_treap.hpp:371
k2_treap(k2_treap &&tr)
Definition: k2_treap.hpp:104
k2_treap_ns::t_p t_p
Definition: k2_treap.hpp:59
std::vector< node_type > children(const node_type &v) const
Definition: k2_treap.hpp:399
uint8_t & t
Definition: k2_treap.hpp:89
bool operator==(k2_treap const &other) const noexcept
Equality operator.
Definition: k2_treap.hpp:383
k2_treap & operator=(k2_treap &&tr)
Move assignment operator.
Definition: k2_treap.hpp:107
k2_treap & operator=(k2_treap &tr)
Assignment operator.
Definition: k2_treap.hpp:123
size_type size() const
Number of points in the 2^k treap.
Definition: k2_treap.hpp:134
k2_treap_ns::node_type node_type
Definition: k2_treap.hpp:57
std::vector< std::tuple< t_x, t_y, t_w > > read(std::vector< int_vector_buffer<> * > &bufs)
Definition: k2_treap.hpp:179
bool operator!=(k2_treap const &other) const noexcept
Inequality operator.
Definition: k2_treap.hpp:390
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition: k2_treap.hpp:330
node_type root() const
Definition: k2_treap.hpp:392
k2_treap_ns::point_type point_type
Definition: k2_treap.hpp:58
int_vector ::size_type size_type
Definition: k2_treap.hpp:52
bool is_leaf(const node_type &v) const
Definition: k2_treap.hpp:397
k2_treap(const k2_treap &tr)
Definition: k2_treap.hpp:93
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
k2_treap_algorithm.hpp contains k^2-treap algorithms.
k2_treap_helper.hpp contains helper functions and definitions for a k^2-treap implementation.
std::complex< uint64_t > t_p
uint64_t id()
uint64_t pid()
std::string to_string(const T &t, int w=1)
Namespace for the succinct data structure library.
void load_vector(std::vector< T > &, std::istream &)
Load all elements of a vector from a input stream.
Definition: io.hpp:404
uint64_t serialize_vector(const std::vector< T > &, std::ostream &, sdsl::structure_tree_node *v=nullptr, std::string="")
Serialize each element of an std::vector.
Definition: io.hpp:376
void swap(int_vector_reference< t_int_vector > x, int_vector_reference< t_int_vector > y) noexcept
Definition: int_vector.hpp:970
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
Definition: int_vector.hpp:51
bool load_from_file(T &v, const std::string &file)
Load sdsl-object v from a file.
Definition: io.hpp:901
int remove(const std::string &)
Remove a file.
Definition: ram_fs.hpp:194
static SDSL_CONSTEXPR uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:661