SDSL  3.0.0
Succinct Data Structure Library
int_vector_buffer.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
8 #ifndef INCLUDED_INT_VECTOR_BUFFER
9 #define INCLUDED_INT_VECTOR_BUFFER
10 
11 #include <cassert>
12 #include <fstream>
13 #include <iostream>
14 #include <stdio.h>
15 #include <string>
16 
17 #include <sdsl/int_vector.hpp>
18 #include <sdsl/iterators.hpp>
19 
20 namespace sdsl
21 {
22 
23 template <uint8_t t_width = 0>
25 {
26  public:
27  class iterator;
30 
31  private:
32  static_assert(t_width <= 64, "int_vector_buffer: width must be at most 64 bits.");
33  sdsl::isfstream m_ifile;
34  sdsl::osfstream m_ofile;
35  std::string m_filename;
36  int_vector<t_width> m_buffer;
37  bool m_need_to_write = false;
38  // length of int_vector header in bytes: 0 for plain, 8 for int_vector<t_width> (0 < t_width), 9 for int_vector<0>
39  uint64_t m_offset = 0;
40  uint64_t m_buffersize = 8; // in elements! m_buffersize*width() must be a multiple of 8!
41  uint64_t m_size = 0; // size of int_vector_buffer
42  uint64_t m_begin = 0; // number in elements
43 
45  void read_block(const uint64_t idx)
46  {
47  m_begin = (idx / m_buffersize) * m_buffersize;
48  if (m_begin >= m_size) { util::set_to_value(m_buffer, 0); }
49  else
50  {
51  m_ifile.seekg(m_offset + (m_begin * width()) / 8);
52  assert(m_ifile.good());
53  m_ifile.read((char *)m_buffer.data(), (m_buffersize * width()) / 8);
54  if ((uint64_t)m_ifile.gcount() < (m_buffersize * width()) / 8) { m_ifile.clear(); }
55  assert(m_ifile.good());
56  for (uint64_t i = m_size - m_begin; i < m_buffersize; ++i) { m_buffer[i] = 0; }
57  }
58  }
59 
61  void write_block()
62  {
63  if (m_need_to_write)
64  {
65  m_ofile.seekp(m_offset + (m_begin * width()) / 8);
66  assert(m_ofile.good());
67  if (m_begin + m_buffersize >= m_size)
68  {
69  // last block in file
70  uint64_t wb = ((m_size - m_begin) * width() + 7) / 8;
71  m_ofile.write((char *)m_buffer.data(), wb);
72  }
73  else
74  {
75  m_ofile.write((char *)m_buffer.data(), (m_buffersize * width()) / 8);
76  }
77  m_ofile.flush();
78  assert(m_ofile.good());
79  m_need_to_write = false;
80  }
81  }
82 
84  uint64_t read(const uint64_t idx)
85  {
86  assert(is_open());
87  assert(idx < m_size);
88  if (idx < m_begin or m_begin + m_buffersize <= idx)
89  {
90  write_block();
91  read_block(idx);
92  }
93  return m_buffer[idx - m_begin];
94  }
95 
97  void write(const uint64_t idx, const uint64_t value)
98  {
99  assert(is_open());
100  // If idx is not in current block, write current block and load needed block
101  if (idx < m_begin or m_begin + m_buffersize <= idx)
102  {
103  write_block();
104  read_block(idx);
105  }
106  if (m_size <= idx) { m_size = idx + 1; }
107  m_need_to_write = true;
108  m_buffer[idx - m_begin] = value;
109  }
110 
111  public:
114 
116 
125  int_vector_buffer(const std::string filename,
126  std::ios::openmode mode = std::ios::in,
127  const uint64_t buffer_size = 1024 * 1024,
128  const uint8_t int_width = t_width,
129  const bool is_plain = false)
130  {
131  m_filename = filename;
132  assert(!(mode & std::ios::app));
133  mode &= ~std::ios::app;
134  m_buffer.width(int_width);
135  if (is_plain)
136  {
137  m_offset = 0;
138  // is_plain is only allowed with width() in {8, 16, 32, 64}
139  assert(8 == width() or 16 == width() or 32 == width() or 64 == width());
140  }
141  else
142  {
143  m_offset = 8; // TODO: make this dependent on header size of int_vector<t_width>
144  }
145 
146  // Open file for IO
147  m_ofile.open(m_filename, mode | std::ios::out | std::ios::binary);
148  assert(m_ofile.good());
149  m_ifile.open(m_filename, std::ios::in | std::ios::binary);
150  assert(m_ifile.good());
151  if (mode & std::ios::in)
152  {
153  uint64_t size = 0;
154  if (is_plain)
155  {
156  m_ifile.seekg(0, std::ios_base::end);
157  size = m_ifile.tellg() * 8;
158  }
159  else
160  {
161  uint8_t width = 0;
163  m_buffer.width(width);
164  }
165  assert(m_ifile.good());
166  m_size = size / width();
167  }
168  buffersize(buffer_size);
169  }
170 
173  : m_filename(std::move(ivb.m_filename))
174  , m_buffer(std::move(ivb.m_buffer))
175  , m_need_to_write(ivb.m_need_to_write)
176  , m_offset(ivb.m_offset)
177  , m_buffersize(ivb.m_buffersize)
178  , m_size(ivb.m_size)
179  , m_begin(ivb.m_begin)
180  {
181  ivb.m_ifile.close();
182  ivb.m_ofile.close();
183  m_ifile.open(m_filename, std::ios::in | std::ios::binary);
184  m_ofile.open(m_filename, std::ios::in | std::ios::out | std::ios::binary);
185  assert(m_ifile.good());
186  assert(m_ofile.good());
187  // set ivb to default-constructor state
188  ivb.m_filename = "";
189  ivb.m_buffer = int_vector<t_width>();
190  ivb.m_need_to_write = false;
191  ivb.m_offset = 0;
192  ivb.m_buffersize = 8;
193  ivb.m_size = 0;
194  ivb.m_begin = 0;
195  }
196 
199 
202  {
203  close();
204  ivb.m_ifile.close();
205  ivb.m_ofile.close();
206  m_filename = ivb.m_filename;
207  m_ifile.open(m_filename, std::ios::in | std::ios::binary);
208  m_ofile.open(m_filename, std::ios::in | std::ios::out | std::ios::binary);
209  assert(m_ifile.good());
210  assert(m_ofile.good());
211  // assign the values of ivb to this
212  m_buffer = (int_vector<t_width> &&) ivb.m_buffer;
213  m_need_to_write = ivb.m_need_to_write;
214  m_offset = ivb.m_offset;
215  m_buffersize = ivb.m_buffersize;
216  m_size = ivb.m_size;
217  m_begin = ivb.m_begin;
218  // set ivb to default-constructor state
219  ivb.m_filename = "";
220  ivb.m_buffer = int_vector<t_width>();
221  ivb.m_need_to_write = false;
222  ivb.m_offset = 0;
223  ivb.m_buffersize = 8;
224  ivb.m_size = 0;
225  ivb.m_begin = 0;
226  return *this;
227  }
228 
230  uint8_t width() const { return m_buffer.width(); }
231 
233  uint64_t size() const { return m_size; }
234 
236  std::string filename() const { return m_filename; }
237 
239  uint64_t buffersize() const
240  {
241  assert(m_buffersize * width() % 8 == 0);
242  return (m_buffersize * width()) / 8;
243  }
244 
246  void buffersize(uint64_t buffersize)
247  {
248  if (0ULL == buffersize) buffersize = 8;
249  write_block();
250  if (0 == (buffersize * 8) % width())
251  {
252  m_buffersize = buffersize * 8 /
253  width(); // m_buffersize might not be multiple of 8, but m_buffersize*width() is.
254  }
255  else
256  {
257  uint64_t element_buffersize = (buffersize * 8) / width() +
258  1; // one more element than fits into given buffersize in byte
259  m_buffersize = element_buffersize + 7 - (element_buffersize + 7) % 8; // take next multiple of 8
260  }
261  m_buffer = int_vector<t_width>(m_buffersize, 0, width());
262  if (0 != m_buffersize) read_block(0);
263  }
264 
266  bool good() { return m_ifile.good() and m_ofile.good(); }
267 
269  bool is_open()
270  {
271  return m_ifile.is_open() and m_ofile.is_open();
272  ;
273  }
274 
276  void reset()
277  {
278  // reset file
279  assert(m_ifile.good());
280  assert(m_ofile.good());
281  m_ifile.close();
282  m_ofile.close();
283  m_ofile.open(m_filename, std::ios::out | std::ios::binary);
284  assert(m_ofile.good());
285  m_ifile.open(m_filename, std::ios::in | std::ios::binary);
286  assert(m_ifile.good());
287  assert(m_ofile.good());
288  // reset member variables
289  m_need_to_write = false;
290  m_size = 0;
291  // reset buffer
292  read_block(0);
293  }
294 
295  // Forward declaration
296  class reference;
297 
299 
302  reference operator[](uint64_t idx) { return reference(this, idx); }
303 
305  void push_back(const uint64_t value) { write(m_size, value); }
306 
308 
311  void close(bool remove_file = false)
312  {
313  if (is_open())
314  {
315  if (!remove_file)
316  {
317  write_block();
318  if (0 < m_offset)
319  { // in case of int_vector, write header and trailing zeros
320  uint64_t size = m_size * width();
321  m_ofile.seekp(0, std::ios::beg);
323  assert(m_ofile.good());
324  uint64_t wb = (size + 7) / 8;
325  if (wb % 8)
326  {
327  m_ofile.seekp(m_offset + wb);
328  assert(m_ofile.good());
329  m_ofile.write("\0\0\0\0\0\0\0\0", 8 - wb % 8);
330  assert(m_ofile.good());
331  }
332  }
333  }
334  m_ifile.close();
335  assert(m_ifile.good());
336  m_ofile.close();
337  assert(m_ofile.good());
338  if (remove_file) { sdsl::remove(m_filename); }
339  }
340  }
341 
342  iterator begin() { return iterator(*this, 0); }
343 
344  iterator end() { return iterator(*this, size()); }
345 
346  class reference
347  {
348  friend class int_vector_buffer<t_width>;
349 
350  private:
351  int_vector_buffer<t_width> * const m_int_vector_buffer = nullptr;
352  uint64_t m_idx = 0;
353 
354  reference() {}
355 
356  reference(int_vector_buffer<t_width> * _int_vector_buffer, uint64_t _idx)
357  : m_int_vector_buffer(_int_vector_buffer)
358  , m_idx(_idx)
359  {}
360 
361  public:
363  operator uint64_t() const { return m_int_vector_buffer->read(m_idx); }
364 
366  reference & operator=(const uint64_t & val)
367  {
368  m_int_vector_buffer->write(m_idx, val);
369  return *this;
370  }
371 
373  reference & operator=(reference & x) { return *this = (uint64_t)(x); };
374 
377  {
378  uint64_t x = m_int_vector_buffer->read(m_idx);
379  m_int_vector_buffer->write(m_idx, x + 1);
380  return *this;
381  }
382 
384  uint64_t operator++(int)
385  {
386  uint64_t val = (uint64_t) * this;
387  ++(*this);
388  return val;
389  }
390 
393  {
394  uint64_t x = m_int_vector_buffer->read(m_idx);
395  m_int_vector_buffer->write(m_idx, x - 1);
396  return *this;
397  }
398 
400  uint64_t operator--(int)
401  {
402  uint64_t val = (uint64_t) * this;
403  --(*this);
404  return val;
405  }
406 
408  reference & operator+=(const uint64_t x)
409  {
410  uint64_t w = m_int_vector_buffer->read(m_idx);
411  m_int_vector_buffer->write(m_idx, w + x);
412  return *this;
413  }
414 
416  reference & operator-=(const uint64_t x)
417  {
418  uint64_t w = m_int_vector_buffer->read(m_idx);
419  m_int_vector_buffer->write(m_idx, w - x);
420  return *this;
421  }
422 
423  bool operator==(const reference & x) const { return (uint64_t) * this == (uint64_t)x; }
424 
425  bool operator<(const reference & x) const { return (uint64_t) * this < (uint64_t)x; }
426  };
427 
428  class iterator
429  : public std::iterator<std::random_access_iterator_tag, value_type, difference_type, value_type *, reference>
430  {
431  private:
433  uint64_t m_idx = 0;
434 
435  public:
436  iterator() = delete;
437  iterator(int_vector_buffer<t_width> & ivb, uint64_t idx = 0)
438  : m_ivb(ivb)
439  , m_idx(idx)
440  {}
441 
443  {
444  ++m_idx;
445  return *this;
446  }
447 
449  {
450  iterator it = *this;
451  ++(*this);
452  return it;
453  }
454 
456  {
457  --m_idx;
458  return *this;
459  }
460 
462  {
463  iterator it = *this;
464  --(*this);
465  return it;
466  }
467 
468  reference operator*() const { return m_ivb[m_idx]; }
469 
471  {
472  if (i < 0) return *this -= (-i);
473  m_idx += i;
474  return *this;
475  }
476 
478  {
479  if (i < 0) return *this += (-i);
480  m_idx -= i;
481  return *this;
482  }
483 
485  {
486  iterator it = *this;
487  return it += i;
488  }
489 
491  {
492  iterator it = *this;
493  return it -= i;
494  }
495 
496  bool operator==(const iterator & it) const { return &m_ivb == &(it.m_ivb) and m_idx == it.m_idx; }
497 
498  bool operator!=(const iterator & it) const { return !(*this == it); }
499  inline difference_type operator-(const iterator & it) { return (m_idx - it.m_idx); }
500  };
501 };
502 
503 } // namespace sdsl
504 
505 #endif // include guard
iterator operator-(difference_type i) const
bool operator!=(const iterator &it) const
iterator & operator+=(difference_type i)
iterator(int_vector_buffer< t_width > &ivb, uint64_t idx=0)
difference_type operator-(const iterator &it)
iterator operator+(difference_type i) const
iterator & operator-=(difference_type i)
bool operator==(const iterator &it) const
uint64_t operator++(int)
Postfix increment of the proxy object.
bool operator==(const reference &x) const
reference & operator+=(const uint64_t x)
Add assign from the proxy object.
reference & operator-=(const uint64_t x)
Subtract assign from the proxy object.
reference & operator++()
Prefix increment of the proxy object.
uint64_t operator--(int)
Postfix decrement of the proxy object.
reference & operator=(reference &x)
Assignment operator.
reference & operator=(const uint64_t &val)
Assignment operator for write operations.
reference & operator--()
Prefix decrement of the proxy object.
bool operator<(const reference &x) const
uint64_t size() const
Returns the number of elements currently stored.
void reset()
Delete all content and set size to 0.
void close(bool remove_file=false)
Close the int_vector_buffer.
reference operator[](uint64_t idx)
[] operator
int_vector_buffer(const std::string filename, std::ios::openmode mode=std::ios::in, const uint64_t buffer_size=1024 *1024, const uint8_t int_width=t_width, const bool is_plain=false)
Constructor for int_vector_buffer.
int_vector< t_width >::difference_type difference_type
int_vector< t_width >::value_type value_type
bool good()
Returns whether state of underlying streams are good.
void buffersize(uint64_t buffersize)
Set the buffersize in bytes.
std::string filename() const
Returns the filename.
int_vector_buffer< t_width > & operator=(int_vector_buffer &&ivb)
Move assignment operator.
uint64_t buffersize() const
Returns the buffersize in bytes.
void push_back(const uint64_t value)
Appends the given element value to the end of the int_vector_buffer.
uint8_t width() const
Returns the width of the integers which are accessed via the [] operator.
int_vector_buffer(int_vector_buffer &&ivb)
Move constructor.
bool is_open()
Returns whether underlying streams are currently associated to a file.
A generic vector class for integers of width .
Definition: int_vector.hpp:253
ptrdiff_t difference_type
Definition: int_vector.hpp:265
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:590
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
Definition: int_vector.hpp:619
int_vector_trait< t_width >::value_type value_type
Definition: int_vector.hpp:255
static size_t read_header(int_vector_size_type &size, int_width_type &int_width, std::istream &in)
Read the size and int_width of a int_vector.
Definition: int_vector.hpp:813
static uint64_t write_header(uint64_t size, uint8_t int_width, std::ostream &out)
Write the size and int_width of a int_vector.
Definition: int_vector.hpp:830
bool is_open()
Is the stream close?
Definition: sfstream.hpp:222
std::streampos tellg()
Definition: sfstream.hpp:303
buf_ptr_type open(const std::string &file, std::ios_base::openmode mode=std::ios_base::in)
Open the stream.
Definition: sfstream.hpp:194
void close()
Close the stream.
Definition: sfstream.hpp:233
isfstream & seekg(pos_type pos)
Definition: sfstream.hpp:257
void close()
Close the stream.
Definition: sfstream.hpp:87
bool is_open()
Is the stream close?
Definition: sfstream.hpp:76
buf_ptr_type open(const std::string &file, std::ios_base::openmode mode=std::ios_base::out)
Open the stream.
Definition: sfstream.hpp:48
osfstream & seekp(pos_type pos)
Definition: sfstream.hpp:111
int_vector.hpp contains the sdsl::int_vector class.
iterators.hpp contains an generic iterator for random access containers.
void set_to_value(t_int_vec &v, uint64_t k)
Set all entries of int_vector to value k.
Definition: util.hpp:566
Namespace for the succinct data structure library.
int remove(const std::string &)
Remove a file.
Definition: ram_fs.hpp:194