SDSL  3.0.0
Succinct Data Structure Library
nearest_neighbour_dictionary.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
8 #ifndef INCLUDED_SDSL_NEAREST_NEIGHBOUR_DICTIONARY
9 #define INCLUDED_SDSL_NEAREST_NEIGHBOUR_DICTIONARY
10 
11 #include <stdexcept>
12 #include <string>
13 
14 #include <sdsl/int_vector.hpp>
15 #include <sdsl/rank_support.hpp>
16 #include <sdsl/util.hpp>
17 #include <sdsl/wm_int.hpp>
18 
20 namespace sdsl
21 {
22 
25 
36 // TODO: implement an iterator for the ones in the nearest neighbour dictionary!!! used in the construction of the
37 // balanced parentheses support
38 template <uint8_t t_sample_dens>
40 {
41  private:
42  static_assert(t_sample_dens != 0, "nearest_neighbour_dictionary: t_sample_dens should not be equal 0!");
43 
44  public:
46 
47  private:
48  int_vector<> m_abs_samples; // absolute samples array corresponds to array \f$ A_1 \f$ in the paper
49  int_vector<> m_differences; // vector for the differences in between the samples; corresponds to array \f$ A_2 \f$
50  // in the paper
51  size_type m_ones; // corresponds to N in the paper
52  size_type m_size; // corresponds to M in the paper
53  bit_vector m_contains_abs_sample; // vector which stores for every block of length t_sample_dens of the original
54  // bit_vector if an absolute sample lies in this block.
55  // Corresponds to array \f$ A_3 \f$ in the paper.
56  rank_support_v<> m_rank_contains_abs_sample; // rank support for m_contains_abs_sample. Corresponds to array \f$ A_4
57  // \f$ in the paper.
58  // NOTE: A faster version should store the absolute samples and the differences interleaved
59 
60  public:
63  : m_ones(0)
64  , m_size(0)
65  {}
66 
68 
71  : m_ones(0)
72  , m_size(0)
73  {
74  size_type max_distance_between_two_ones = 0;
75  size_type ones = 0; // counter for the ones in v
76 
77  // get maximal distance between to ones in the bit vector
78  // speed this up by broadword computing
79  for (size_type i = 0, last_one_pos_plus_1 = 0; i < v.size(); ++i)
80  {
81  if (v[i])
82  {
83  if (i + 1 - last_one_pos_plus_1 > max_distance_between_two_ones)
84  max_distance_between_two_ones = i + 1 - last_one_pos_plus_1;
85  last_one_pos_plus_1 = i + 1;
86  ++ones;
87  }
88  }
89  m_ones = ones;
90  m_size = v.size();
91  // std::cerr<<ones<<std::endl;
92  // initialize absolute samples m_abs_samples[0]=0
93  m_abs_samples = int_vector<>(m_ones / t_sample_dens + 1, 0, bits::hi(v.size()) + 1);
94  // initialize different values
95  m_differences = int_vector<>(m_ones - m_ones / t_sample_dens, 0, bits::hi(max_distance_between_two_ones) + 1);
96  // initialize m_contains_abs_sample
97  m_contains_abs_sample = bit_vector((v.size() + t_sample_dens - 1) / t_sample_dens, 0);
98  ones = 0;
99  for (size_type i = 0, last_one_pos = 0; i < v.size(); ++i)
100  {
101  if (v[i])
102  {
103  ++ones;
104  if ((ones % t_sample_dens) == 0)
105  { // insert absolute samples
106  m_abs_samples[ones / t_sample_dens] = i;
107  m_contains_abs_sample[i / t_sample_dens] = 1;
108  }
109  else
110  {
111  m_differences[ones - ones / t_sample_dens - 1] = i - last_one_pos;
112  }
113  last_one_pos = i;
114  }
115  }
116  util::init_support(m_rank_contains_abs_sample, &m_contains_abs_sample);
117  }
118 
121  : m_abs_samples(nnd.m_abs_samples)
122  , m_differences(nnd.m_differences)
123  , m_ones(nnd.m_ones)
124  , m_size(nnd.m_size)
125  , m_contains_abs_sample(nnd.m_contains_abs_sample)
126  , m_rank_contains_abs_sample(nnd.m_rank_contains_abs_sample)
127  {
128  m_rank_contains_abs_sample.set_vector(&m_contains_abs_sample);
129  }
130 
132  nearest_neighbour_dictionary(nearest_neighbour_dictionary && nnd) { *this = std::move(nnd); }
133 
136 
138  {
139  if (*this != &nnd)
140  {
142  *this = std::move(tmp);
143  }
144  return *this;
145  }
146 
148  {
149  if (this != &nnd)
150  {
151  m_abs_samples = std::move(nnd.m_abs_samples);
152  m_differences = std::move(nnd.m_differences);
153  m_ones = std::move(nnd.m_ones);
154  m_size = std::move(nnd.m_size);
155  m_contains_abs_sample = std::move(nnd.m_contains_abs_sample);
156  m_rank_contains_abs_sample = std::move(nnd.m_rank_contains_abs_sample);
157  m_rank_contains_abs_sample.set_vector(&m_contains_abs_sample);
158  }
159  return *this;
160  }
161 
163 
168  {
169  assert(idx <= m_size);
170  size_type r = m_rank_contains_abs_sample.rank(idx / t_sample_dens); //
171  size_type result = r * t_sample_dens;
172  size_type i = m_abs_samples[r];
173  while (++result <= m_ones)
174  {
175  if ((result % t_sample_dens) == 0) { i = m_abs_samples[result / t_sample_dens]; }
176  else
177  {
178  i = i + m_differences[result - result / t_sample_dens - 1];
179  }
180  if (i >= idx) return result - 1;
181  }
182  return result - 1;
183  };
184 
186 
191  {
192  assert(i > 0 and i <= m_ones);
193  size_type j = i / t_sample_dens;
194  size_type result = m_abs_samples[j];
195  j = j * t_sample_dens - j;
196  for (size_type end = j + (i % t_sample_dens); j < end; ++j) { result += m_differences[j]; }
197  return result;
198  }
199 
201 
207  {
208  size_type r = rank(i + 1);
209  assert(r > 0);
210  return select(r);
211  }
219  {
220  size_type r = rank(i);
221  assert(r < m_ones);
222  return select(r + 1);
223  }
224 
225  size_type size() const { return m_size; }
226 
227  size_type ones() const { return m_ones; }
228 
230 
232  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
233  {
234  size_type written_bytes = 0;
235  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
236  written_bytes += m_abs_samples.serialize(out, child, "absolute_samples");
237  written_bytes += m_differences.serialize(out, child, "differences");
238  written_bytes += write_member(m_ones, out, child, "ones");
239  written_bytes += write_member(m_size, out, child, "size");
240  written_bytes += m_contains_abs_sample.serialize(out, child, "contains_abs_sample");
241  written_bytes += m_rank_contains_abs_sample.serialize(out, child, "rank_contains_abs_sample");
242  structure_tree::add_size(child, written_bytes);
243  return written_bytes;
244  }
245 
247 
249  void load(std::istream & in)
250  {
251  m_abs_samples.load(in);
252  m_differences.load(in);
253  read_member(m_ones, in);
254  read_member(m_size, in);
255  m_contains_abs_sample.load(in);
256  m_rank_contains_abs_sample.load(in, &m_contains_abs_sample);
257  }
258 
259  template <typename archive_t>
260  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
261  {
262  ar(CEREAL_NVP(m_ones));
263  ar(CEREAL_NVP(m_size));
264  ar(CEREAL_NVP(m_abs_samples));
265  ar(CEREAL_NVP(m_differences));
266  ar(CEREAL_NVP(m_contains_abs_sample));
267  ar(CEREAL_NVP(m_rank_contains_abs_sample));
268  }
269 
270  template <typename archive_t>
271  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
272  {
273  ar(CEREAL_NVP(m_ones));
274  ar(CEREAL_NVP(m_size));
275  ar(CEREAL_NVP(m_abs_samples));
276  ar(CEREAL_NVP(m_differences));
277  ar(CEREAL_NVP(m_contains_abs_sample));
278  ar(CEREAL_NVP(m_rank_contains_abs_sample));
279  m_rank_contains_abs_sample.set_vector(&m_contains_abs_sample);
280  }
281 
283  bool operator==(nearest_neighbour_dictionary const & other) const noexcept
284  {
285  return (m_ones == other.m_ones) && (m_size == other.m_size) && (m_abs_samples == other.m_abs_samples) &&
286  (m_differences == other.m_differences) && (m_contains_abs_sample == other.m_contains_abs_sample) &&
287  (m_rank_contains_abs_sample == other.m_rank_contains_abs_sample);
288  }
289 
291  bool operator!=(nearest_neighbour_dictionary const & other) const noexcept { return !(*this == other); }
292 };
293 
294 } // end namespace sdsl
295 
296 #endif // end file
#define CEREAL_NVP(X)
Definition: cereal.hpp:30
int_vector_size_type size_type
Definition: int_vector.hpp:266
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
Nearest neighbour dictionary for sparse uniform sets (described in Geary et al., A Simple Optimal Rep...
size_type next(size_type i) const
Answers "next occurence of one" queries for the supported bit_vector.
bool operator==(nearest_neighbour_dictionary const &other) const noexcept
Equality operator.
size_type rank(size_type idx) const
Answers rank queries for the supported bit_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the nearest_neighbour_dictionary.
void load(std::istream &in)
Loads the nearest_neighbour_dictionary.
nearest_neighbour_dictionary(const bit_vector &v)
Constructor.
nearest_neighbour_dictionary & operator=(const nearest_neighbour_dictionary &nnd)
nearest_neighbour_dictionary(nearest_neighbour_dictionary &&nnd)
Move constructor.
nearest_neighbour_dictionary(const nearest_neighbour_dictionary &nnd)
Copy constructor.
size_type select(size_type i) const
Answers select queries for the supported bit_vector.
nearest_neighbour_dictionary & operator=(nearest_neighbour_dictionary &&nnd)
size_type prev(size_type i) const
Answers "previous occurence of one" queries for the supported bit_vector.
bool operator!=(nearest_neighbour_dictionary const &other) const noexcept
Inequality operator.
A rank structure proposed by Sebastiano Vigna.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes rank_support.
size_type rank(size_type idx) const
Answers rank queries for the supported bit_vector.
void set_vector(const bit_vector *v=nullptr)
Sets the supported bit_vector to the given pointer.
void load(std::istream &in, const int_vector< 1 > *v=nullptr)
Loads the rank_support.
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
int_vector.hpp contains the sdsl::int_vector class.
Namespace for the succinct data structure library.
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
Definition: int_vector.hpp:51
rank_support.hpp contains classes that support a sdsl::bit_vector with constant time rank information...
static SDSL_CONSTEXPR uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:661
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.
wm_int.hpp contains a specialized class for a wavelet tree for sequences over large alphabets.