SDSL  3.0.0
Succinct Data Structure Library
enc_vector.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
8 #ifndef SDSL_ENC_VECTOR
9 #define SDSL_ENC_VECTOR
10 
11 #include <sdsl/coder.hpp>
12 #include <sdsl/int_vector.hpp>
13 #include <sdsl/iterators.hpp>
14 
16 namespace sdsl
17 {
18 
19 template <uint8_t t_width>
21 {
23 };
24 
25 template <>
26 struct enc_vector_trait<32>
27 {
29 };
30 
31 template <>
32 struct enc_vector_trait<64>
33 {
35 };
36 
38 
48 template <class t_coder = coder::elias_delta<>, uint32_t t_dens = 128, uint8_t t_width = 0>
50 {
51  private:
52  static_assert(t_dens > 1, "enc_vector: sample density must be larger than `1`");
53 
54  public:
55  typedef uint64_t value_type;
58  typedef const value_type reference;
59  typedef const value_type const_reference;
60  typedef const value_type * const_pointer;
61  typedef ptrdiff_t difference_type;
63  typedef t_coder coder;
66  static const uint32_t sample_dens = t_dens;
68 
69  int_vector<0> m_z; // storage for encoded deltas
70  private:
71  int_vector_type m_sample_vals_and_pointer; // samples and pointers
72  size_type m_size = 0; // number of vector elements
73 
74  void clear()
75  {
76  m_z.resize(0);
78  m_size = 0;
79  m_sample_vals_and_pointer.resize(0);
80  m_sample_vals_and_pointer.shrink_to_fit();
81  }
82 
83  public:
84  enc_vector() = default;
85  enc_vector(const enc_vector &) = default;
86  enc_vector(enc_vector &&) = default;
87  enc_vector & operator=(const enc_vector &) = default;
88  enc_vector & operator=(enc_vector &&) = default;
89 
91 
93  template <class Container>
94  enc_vector(const Container & c);
95 
97  /*
98  * \param v_buf A int_vector_buf.
99  */
100  template <uint8_t int_width>
102 
105 
107  size_type size() const { return m_size; }
108 
110  static size_type max_size() { return int_vector<>::max_size() / 2; }
111 
113  bool empty() const { return 0 == m_size; }
114 
116  const const_iterator begin() const { return const_iterator(this, 0); }
117 
119  const const_iterator end() const { return const_iterator(this, this->m_size); }
120 
121  bool operator==(const enc_vector & v) const
122  {
123  return m_size && v.m_size && m_z == v.m_z && m_sample_vals_and_pointer == v.m_sample_vals_and_pointer;
124  }
125 
126  bool operator!=(const enc_vector & v) const { return !(*this == v); }
127 
129 
132 
134 
137  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const;
138 
140  void load(std::istream & in);
141 
142  template <typename archive_t>
143  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const;
144 
145  template <typename archive_t>
146  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar);
147 
149 
152  value_type sample(const size_type i) const;
153 
154  uint32_t get_sample_dens() const { return t_dens; }
155 
160  void get_inter_sampled_values(const size_type i, uint64_t * it) const
161  {
162  *(it++) = 0;
163  if (i * t_dens + t_dens - 1 < size())
164  {
165  t_coder::template decode<true, true>(m_z.data(), m_sample_vals_and_pointer[(i << 1) + 1], t_dens - 1, it);
166  }
167  else
168  {
169  assert(i * t_dens < size());
170  t_coder::template decode<true, true>(m_z.data(),
171  m_sample_vals_and_pointer[(i << 1) + 1],
172  size() - i * t_dens - 1,
173  it);
174  }
175  };
176 };
177 
178 template <class t_coder, uint32_t t_dens, uint8_t t_width>
180  const size_type i) const
181 {
182  assert(i + 1 != 0);
183  assert(i < m_size);
184  size_type idx = i / get_sample_dens();
185  return m_sample_vals_and_pointer[idx << 1] +
186  t_coder::decode_prefix_sum(m_z.data(), m_sample_vals_and_pointer[(idx << 1) + 1], i - t_dens * idx);
187 }
188 
189 template <class t_coder, uint32_t t_dens, uint8_t t_width>
191  const size_type i) const
192 {
193  assert(i * get_sample_dens() + 1 != 0);
194  assert(i * get_sample_dens() < m_size);
195  return m_sample_vals_and_pointer[i << 1];
196 }
197 
198 template <class t_coder, uint32_t t_dens, uint8_t t_width>
199 template <class Container>
201 {
202  // clear bit_vectors
203  clear();
204 
205  if (c.empty()) // if c is empty there is nothing to do...
206  return;
207  typename Container::const_iterator it = c.begin(), end = c.end();
208  typename Container::value_type v1 = *it, v2, max_sample_value = 0, x;
209  size_type samples = 0;
210  size_type z_size = 0;
211  // (1) Calculate maximal value of samples and of deltas
212  for (size_type i = 0, no_sample = 0; it != end; ++it, ++i, --no_sample)
213  {
214  v2 = *it;
215  if (!no_sample)
216  { // add a sample
217  no_sample = get_sample_dens();
218  if (max_sample_value < v2) max_sample_value = v2;
219  ++samples;
220  }
221  else
222  {
223  z_size += t_coder::encoding_length(v2 - v1);
224  }
225  v1 = v2;
226  }
227  // (2) Write sample values and deltas
228  {
229  if (max_sample_value > z_size + 1)
230  m_sample_vals_and_pointer.width(bits::hi(max_sample_value) + 1);
231  else
232  m_sample_vals_and_pointer.width(bits::hi(z_size + 1) + 1);
233  m_sample_vals_and_pointer.resize(2 * samples + 2); // add 2 for last entry
234  util::set_to_value(m_sample_vals_and_pointer, 0);
235  typename int_vector_type::iterator sv_it = m_sample_vals_and_pointer.begin();
236  z_size = 0;
237  size_type no_sample = 0;
238  for (it = c.begin(); it != end; ++it, --no_sample)
239  {
240  v2 = *it;
241  if (!no_sample)
242  { // add a sample
243  no_sample = get_sample_dens();
244  *sv_it = v2;
245  ++sv_it;
246  *sv_it = z_size;
247  ++sv_it;
248  }
249  else
250  {
251  x = v2 - v1;
252  z_size += t_coder::encoding_length(x);
253  }
254  v1 = v2;
255  }
256  *sv_it = 0;
257  ++sv_it; // initialize
258  *sv_it = z_size + 1;
259  ++sv_it; // last entry
260 
261  m_z = int_vector<>(z_size, 0, 1);
262  uint64_t * z_data = t_coder::raw_data(m_z);
263  uint8_t offset = 0;
264  no_sample = 0;
265  for (it = c.begin(); it != end; ++it, --no_sample)
266  {
267  v2 = *it;
268  if (!no_sample)
269  { // add a sample
270  no_sample = get_sample_dens();
271  }
272  else
273  {
274  t_coder::encode(v2 - v1, z_data, offset);
275  }
276  v1 = v2;
277  }
278  }
279  m_size = c.size();
280 }
281 
282 template <class t_coder, uint32_t t_dens, uint8_t t_width>
283 template <uint8_t int_width>
285 {
286  // clear bit_vectors
287  clear();
288  size_type n = v_buf.size();
289  if (n == 0) // if c is empty there is nothing to do...
290  return;
291  value_type v1 = 0, v2 = 0, max_sample_value = 0;
292  size_type samples = 0, z_size = 0;
293  const size_type sd = get_sample_dens();
294  // (1) Calculate maximal value of samples and of deltas
295  for (size_type i = 0, no_sample = 0; i < n; ++i, --no_sample)
296  {
297  v2 = v_buf[i];
298  if (!no_sample)
299  { // is sample
300  no_sample = sd;
301  if (max_sample_value < v2) max_sample_value = v2;
302  ++samples;
303  }
304  else
305  {
306  z_size += t_coder::encoding_length(v2 - v1);
307  }
308  v1 = v2;
309  }
310 
311  // (2) Write sample values and deltas
312  // (a) Initialize array for sample values and pointers
313  if (max_sample_value > z_size + 1)
314  m_sample_vals_and_pointer.width(bits::hi(max_sample_value) + 1);
315  else
316  m_sample_vals_and_pointer.width(bits::hi(z_size + 1) + 1);
317  m_sample_vals_and_pointer.resize(2 * samples + 2); // add 2 for last entry
318  util::set_to_value(m_sample_vals_and_pointer, 0);
319 
320  // (b) Initilize bit_vector for encoded data
321  m_z = int_vector<>(z_size, 0, 1);
322  uint64_t * z_data = t_coder::raw_data(m_z);
323  uint8_t offset = 0;
324 
325  // (c) Write sample values and deltas
326  z_size = 0;
327  for (size_type i = 0, j = 0, no_sample = 0; i < n; ++i, --no_sample)
328  {
329  v2 = v_buf[i];
330  if (!no_sample)
331  { // is sample
332  no_sample = sd;
333  m_sample_vals_and_pointer[j++] = v2; // write samples
334  m_sample_vals_and_pointer[j++] = z_size; // write pointers
335  }
336  else
337  {
338  z_size += t_coder::encoding_length(v2 - v1);
339  t_coder::encode(v2 - v1, z_data, offset); // write encoded values
340  }
341  v1 = v2;
342  }
343  m_size = n;
344 }
345 
346 template <class t_coder, uint32_t t_dens, uint8_t t_width>
349  std::string name) const
350 {
351  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
352  size_type written_bytes = 0;
353  written_bytes += write_member(m_size, out, child, "size");
354  written_bytes += m_z.serialize(out, child, "encoded deltas");
355  written_bytes += m_sample_vals_and_pointer.serialize(out, child, "samples_and_pointers");
356  structure_tree::add_size(child, written_bytes);
357  return written_bytes;
358 }
359 
360 template <class t_coder, uint32_t t_dens, uint8_t t_width>
362 {
363  read_member(m_size, in);
364  m_z.load(in);
365  m_sample_vals_and_pointer.load(in);
366 }
367 
368 template <class t_coder, uint32_t t_dens, uint8_t t_width>
369 template <typename archive_t>
371 {
372  ar(CEREAL_NVP(m_size));
373  ar(CEREAL_NVP(m_z));
374  ar(CEREAL_NVP(m_sample_vals_and_pointer));
375 }
376 
377 template <class t_coder, uint32_t t_dens, uint8_t t_width>
378 template <typename archive_t>
380 {
381  ar(CEREAL_NVP(m_size));
382  ar(CEREAL_NVP(m_z));
383  ar(CEREAL_NVP(m_sample_vals_and_pointer));
384 }
385 
386 } // end namespace sdsl
387 #endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:30
A generic immutable space-saving vector class for unsigned integers.
Definition: enc_vector.hpp:50
enc_vector enc_vec_type
Definition: enc_vector.hpp:67
const value_type const_reference
Definition: enc_vector.hpp:59
uint32_t get_sample_dens() const
Definition: enc_vector.hpp:154
const value_type * const_pointer
Definition: enc_vector.hpp:60
enc_vector()=default
random_access_const_iterator< enc_vector > iterator
Definition: enc_vector.hpp:56
enc_vector_trait< t_width >::int_vector_type int_vector_type
Definition: enc_vector.hpp:64
const const_iterator begin() const
Iterator that points to the first element of the enc_vector.
Definition: enc_vector.hpp:116
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize the enc_vector to a stream.
Definition: enc_vector.hpp:347
void load(std::istream &in)
Load the enc_vector from a stream.
Definition: enc_vector.hpp:361
static size_type max_size()
Return the largest size that this container can ever have.
Definition: enc_vector.hpp:110
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: enc_vector.hpp:370
bool operator==(const enc_vector &v) const
Definition: enc_vector.hpp:121
value_type operator[](size_type i) const
operator[]
Definition: enc_vector.hpp:179
const value_type reference
Definition: enc_vector.hpp:58
uint64_t value_type
Definition: enc_vector.hpp:52
bool operator!=(const enc_vector &v) const
Definition: enc_vector.hpp:126
value_type sample(const size_type i) const
Returns the i-th sample of enc_vector.
Definition: enc_vector.hpp:190
enc_vector(const enc_vector &)=default
int_vector ::size_type size_type
Definition: enc_vector.hpp:62
void get_inter_sampled_values(const size_type i, uint64_t *it) const
Definition: enc_vector.hpp:160
const const_iterator end() const
Iterator that points to the position after the last element of the enc_vector.
Definition: enc_vector.hpp:119
~enc_vector()
Default Destructor.
Definition: enc_vector.hpp:104
static const uint32_t sample_dens
Definition: enc_vector.hpp:66
iv_tag index_category
Definition: enc_vector.hpp:65
size_type size() const
The number of elements in the enc_vector.
Definition: enc_vector.hpp:107
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: enc_vector.hpp:379
ptrdiff_t difference_type
Definition: enc_vector.hpp:61
int_vector< 0 > m_z
Definition: enc_vector.hpp:69
enc_vector & operator=(enc_vector &&)=default
iterator const_iterator
Definition: enc_vector.hpp:57
enc_vector(enc_vector &&)=default
enc_vector & operator=(const enc_vector &)=default
bool empty() const
Returns if the enc_vector is empty.
Definition: enc_vector.hpp:113
uint64_t size() const
Returns the number of elements currently stored.
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:590
void shrink_to_fit()
Free unused allocated memory.
Definition: int_vector.hpp:530
static size_type max_size() noexcept
Maximum size of the int_vector.
Definition: int_vector.hpp:566
void resize(const size_type size)
Resize the int_vector in terms of elements.
Definition: int_vector.hpp:545
Generic iterator for a random access container.
Definition: iterators.hpp:24
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
coder.hpp contains the coder namespace and includes the header files of sdsl::coder::fibonacci,...
int_vector.hpp contains the sdsl::int_vector class.
iterators.hpp contains an generic iterator for random access containers.
void set_to_value(t_int_vec &v, uint64_t k)
Set all entries of int_vector to value k.
Definition: util.hpp:566
Namespace for the succinct data structure library.
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
static SDSL_CONSTEXPR uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:661
int_vector< 32 > int_vector_type
Definition: enc_vector.hpp:28
int_vector< 64 > int_vector_type
Definition: enc_vector.hpp:34
int_vector< 0 > int_vector_type
Definition: enc_vector.hpp:22