SDSL  3.0.0
Succinct Data Structure Library
lcp_byte.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
8 #ifndef INCLUDED_SDSL_LCP_BYTE
9 #define INCLUDED_SDSL_LCP_BYTE
10 
11 #include <algorithm> // for lower_bound
12 #include <cassert>
13 #include <iomanip>
14 #include <iostream>
15 #include <iterator>
16 #include <utility> // for pair
17 #include <vector>
18 
19 #include <sdsl/int_vector.hpp>
20 #include <sdsl/iterators.hpp>
21 #include <sdsl/lcp.hpp>
22 
23 namespace sdsl
24 {
25 
27 
38 template <uint8_t t_width = 0>
39 class lcp_byte
40 {
41  public:
45  typedef const value_type const_reference;
48  typedef const pointer const_pointer;
50  typedef ptrdiff_t difference_type;
51 
53  typedef lcp_tag index_tag;
54 
55  enum
56  {
59  sa_order = 1
60  }; // as the lcp_byte is not fast for texts with long repetition
61 
62  template <class Cst>
63  using type = lcp_byte;
64 
65  private:
66  int_vector<8> m_small_lcp; // vector for LCP values < 255
67  int_vector<t_width> m_big_lcp; // vector for LCP values > 254
68  int_vector<t_width> m_big_lcp_idx; // index of LCP entries in the LCP array
69 
70  typedef std::pair<size_type, size_type> tPII;
71  typedef std::vector<tPII> tVPII;
72 
73  public:
75  lcp_byte() = default;
76  lcp_byte(const lcp_byte &) = default;
77  lcp_byte(lcp_byte &&) = default;
78  lcp_byte & operator=(const lcp_byte &) = default;
79  lcp_byte & operator=(lcp_byte &&) = default;
80 
83  {
84  std::string lcp_file = cache_file_name(conf::KEY_LCP, config);
85  int_vector_buffer<> lcp_buf(lcp_file);
86  m_small_lcp = int_vector<8>(lcp_buf.size());
87  size_type l = 0, max_l = 0, max_big_idx = 0, big_sum = 0;
88 
89  for (size_type i = 0; i < m_small_lcp.size(); ++i)
90  {
91  if ((l = lcp_buf[i]) < 255) { m_small_lcp[i] = l; }
92  else
93  {
94  m_small_lcp[i] = 255;
95  if (l > max_l) max_l = l;
96  max_big_idx = i;
97  ++big_sum;
98  }
99  }
100  m_big_lcp = int_vector<>(big_sum, 0, bits::hi(max_l) + 1);
101  m_big_lcp_idx = int_vector<>(big_sum, 0, bits::hi(max_big_idx) + 1);
102 
103  for (size_type i = 0, ii = 0; i < m_small_lcp.size(); ++i)
104  {
105  if ((l = lcp_buf[i]) >= 255)
106  {
107  m_big_lcp[ii] = l;
108  m_big_lcp_idx[ii] = i;
109  ++ii;
110  }
111  }
112  }
113 
115  size_type size() const { return m_small_lcp.size(); }
116 
119 
121  bool empty() const { return m_small_lcp.empty(); }
122 
124  const_iterator begin() const { return const_iterator(this, 0); }
125 
127  const_iterator end() const { return const_iterator(this, size()); }
128 
130 
134  {
135  if (m_small_lcp[i] != 255) { return m_small_lcp[i]; }
136  else
137  {
138  size_type idx = lower_bound(m_big_lcp_idx.begin(), m_big_lcp_idx.end(), i) - m_big_lcp_idx.begin();
139  return m_big_lcp[idx];
140  }
141  }
142 
144  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
145  {
146  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
147  size_type written_bytes = 0;
148  written_bytes += m_small_lcp.serialize(out, child, "small_lcp");
149  written_bytes += m_big_lcp.serialize(out, child, "large_lcp");
150  written_bytes += m_big_lcp_idx.serialize(out, child, "large_lcp_idx");
151  structure_tree::add_size(child, written_bytes);
152  return written_bytes;
153  }
154 
156  void load(std::istream & in)
157  {
158  m_small_lcp.load(in);
159  m_big_lcp.load(in);
160  m_big_lcp_idx.load(in);
161  }
162 
163  template <typename archive_t>
164  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
165  {
166  ar(CEREAL_NVP(m_small_lcp));
167  ar(CEREAL_NVP(m_big_lcp));
168  ar(CEREAL_NVP(m_big_lcp_idx));
169  }
170 
171  template <typename archive_t>
172  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
173  {
174  ar(CEREAL_NVP(m_small_lcp));
175  ar(CEREAL_NVP(m_big_lcp));
176  ar(CEREAL_NVP(m_big_lcp_idx));
177  }
178 
180  bool operator==(lcp_byte const & other) const noexcept
181  {
182  return (m_small_lcp == other.m_small_lcp) && (m_big_lcp == other.m_big_lcp) &&
183  (m_big_lcp_idx == other.m_big_lcp_idx);
184  }
185 
187  bool operator!=(lcp_byte const & other) const noexcept { return !(*this == other); }
188 };
189 
190 } // end namespace sdsl
191 #endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:30
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
Definition: int_vector.hpp:253
iterator end() noexcept
Iterator that points to the element after the last element of int_vector.
Definition: int_vector.hpp:788
bool empty() const noexcept
Equivalent to size() == 0.
Definition: int_vector.hpp:524
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
int_vector_trait< t_width >::value_type value_type
Definition: int_vector.hpp:255
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
static size_type max_size() noexcept
Maximum size of the int_vector.
Definition: int_vector.hpp:566
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
Definition: int_vector.hpp:783
A class for a simple compressed version of LCP information.
Definition: lcp_byte.hpp:40
lcp_tag index_tag
Definition: lcp_byte.hpp:53
lcp_byte(cache_config &config)
Constructor.
Definition: lcp_byte.hpp:82
const_iterator end() const
Returns a const_iterator to the element after the last element.
Definition: lcp_byte.hpp:127
lcp_byte(const lcp_byte &)=default
void load(std::istream &in)
Load from a stream.
Definition: lcp_byte.hpp:156
int_vector< t_width >::value_type value_type
Definition: lcp_byte.hpp:42
lcp_byte()=default
Default Constructor.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: lcp_byte.hpp:172
const_reference reference
Definition: lcp_byte.hpp:46
int_vector ::size_type size_type
Definition: lcp_byte.hpp:49
bool operator==(lcp_byte const &other) const noexcept
Equality operator.
Definition: lcp_byte.hpp:180
random_access_const_iterator< lcp_byte > const_iterator
Definition: lcp_byte.hpp:43
value_type operator[](size_type i) const
[]-operator
Definition: lcp_byte.hpp:133
bool operator!=(lcp_byte const &other) const noexcept
Inequality operator.
Definition: lcp_byte.hpp:187
ptrdiff_t difference_type
Definition: lcp_byte.hpp:50
lcp_byte(lcp_byte &&)=default
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize to a stream.
Definition: lcp_byte.hpp:144
const_reference * pointer
Definition: lcp_byte.hpp:47
const value_type const_reference
Definition: lcp_byte.hpp:45
lcp_plain_tag lcp_category
Definition: lcp_byte.hpp:52
const_iterator iterator
Definition: lcp_byte.hpp:44
const pointer const_pointer
Definition: lcp_byte.hpp:48
lcp_byte & operator=(lcp_byte &&)=default
lcp_byte & operator=(const lcp_byte &)=default
size_type size() const
Number of elements in the instance.
Definition: lcp_byte.hpp:115
static size_type max_size()
Returns the largest size that lcp_byte can ever have.
Definition: lcp_byte.hpp:118
const_iterator begin() const
Returns a const_iterator to the first element.
Definition: lcp_byte.hpp:124
bool empty() const
Returns if the data strucutre is empty.
Definition: lcp_byte.hpp:121
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: lcp_byte.hpp:164
Generic iterator for a random access container.
Definition: iterators.hpp:24
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
int_vector.hpp contains the sdsl::int_vector class.
iterators.hpp contains an generic iterator for random access containers.
lcp.hpp contains classes for lcp information.
constexpr char KEY_LCP[]
Definition: config.hpp:44
Namespace for the succinct data structure library.
std::string cache_file_name(const std::string &key, const cache_config &config)
Returns the file name of the resource.
Definition: io.hpp:630
static SDSL_CONSTEXPR uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:661
Helper class for construction process.
Definition: config.hpp:67