9 #ifndef INCLUDED_SDSL_WT_RLMN
10 #define INCLUDED_SDSL_WT_RLMN
29 template <
class t_alphabet_cat>
39 static std::map<uint64_t, uint64_t>
temp_C() {
return std::map<uint64_t, uint64_t>(); }
43 uint64_t max_symbol = (--C.end())->first;
90 template <
class t_bitvector = sd_vector<>,
91 class t_rank =
typename t_bitvector::rank_1_type,
92 class t_select =
typename t_bitvector::select_1_type,
93 class t_wt = wt_huff<>>
151 template <
typename t_it>
158 if (0 == m_size)
return;
166 for (
auto it =
begin; it !=
end; ++it, ++j)
169 if (last_c != c or it ==
begin)
177 condensed_wt.
close();
180 for (
size_type i = 0, prefix_sum = 0; i < m_C.size(); ++i)
190 for (
auto it =
begin; it !=
end; ++it, ++j)
193 if (bl[j]) { bf[lf_map[c]] = 1; }
205 util::init_support(m_bl_rank, &m_bl);
206 util::init_support(m_bf_rank, &m_bf);
207 util::init_support(m_bf_select, &m_bf);
208 util::init_support(m_bl_select, &m_bl);
211 for (
size_type i = 0; i < m_C.size(); ++i) { m_C_bf_rank[i] = m_bf_rank(m_C[i]); }
220 , m_bl_rank(wt.m_bl_rank)
221 , m_bf_rank(wt.m_bf_rank)
222 , m_bl_select(wt.m_bl_select)
223 , m_bf_select(wt.m_bf_select)
225 , m_C_bf_rank(wt.m_C_bf_rank)
227 m_bl_rank.set_vector(&m_bl);
228 m_bf_rank.set_vector(&m_bf);
229 m_bl_select.set_vector(&m_bl);
230 m_bf_select.set_vector(&m_bf);
236 , m_bl(std::move(wt.m_bl))
237 , m_bf(std::move(wt.m_bf))
238 , m_wt(std::move(wt.m_wt))
239 , m_bl_rank(std::move(wt.m_bl_rank))
240 , m_bf_rank(std::move(wt.m_bf_rank))
241 , m_bl_select(std::move(wt.m_bl_select))
242 , m_bf_select(std::move(wt.m_bf_select))
243 , m_C(std::move(wt.m_C))
244 , m_C_bf_rank(std::move(wt.m_C_bf_rank))
246 m_bl_rank.set_vector(&m_bl);
247 m_bf_rank.set_vector(&m_bf);
248 m_bl_select.set_vector(&m_bl);
249 m_bf_select.set_vector(&m_bf);
258 *
this = std::move(tmp);
268 m_size = std::move(wt.m_size);
269 m_bl = std::move(wt.m_bl);
270 m_bf = std::move(wt.m_bf);
271 m_wt = std::move(wt.m_wt);
272 m_bl_rank = std::move(wt.m_bl_rank);
273 m_bl_rank.set_vector(&m_bl);
274 m_bf_rank = std::move(wt.m_bf_rank);
275 m_bf_rank.set_vector(&m_bf);
276 m_bl_select = std::move(wt.m_bl_select);
277 m_bl_select.set_vector(&m_bl);
278 m_bf_select = std::move(wt.m_bf_select);
279 m_bf_select.set_vector(&m_bf);
280 m_C = std::move(wt.m_C);
281 m_C_bf_rank = std::move(wt.m_C_bf_rank);
290 bool empty()
const {
return 0 == m_size; }
302 return m_wt[m_bl_rank(i + 1) - 1];
317 if (i == 0)
return 0;
319 size_type c_runs = m_wt.rank(wt_ex_pos, c);
320 if (c_runs == 0)
return 0;
321 if (m_wt[wt_ex_pos - 1] == c)
323 size_type c_run_begin = m_bl_select(wt_ex_pos);
324 return m_bf_select(m_C_bf_rank[c] + c_runs) - m_C[c] + i - c_run_begin;
328 return m_bf_select(m_C_bf_rank[c] + c_runs + 1) - m_C[c];
342 if (i == 0) {
return std::make_pair(0, m_wt[0]); }
344 auto rc = m_wt.inverse_select(wt_ex_pos - 1);
347 if (c_runs == 0)
return std::make_pair(0, c);
348 if (m_wt[wt_ex_pos - 1] == c)
350 size_type c_run_begin = m_bl_select(wt_ex_pos);
351 return std::make_pair(m_bf_select(m_C_bf_rank[c] + c_runs) - m_C[c] + i - c_run_begin, c);
355 return std::make_pair(m_bf_select(m_C_bf_rank[c] + c_runs + 1) - m_C[c], c);
371 size_type c_runs = m_bf_rank(m_C[c] + i) - m_C_bf_rank[c];
372 size_type offset = m_C[c] + i - 1 - m_bf_select(c_runs + m_C_bf_rank[c]);
373 return m_bl_select(m_wt.select(c_runs, c) + 1) + offset;
387 written_bytes +=
write_member(m_size, out, child,
"size");
388 written_bytes += m_bl.serialize(out, child,
"bl");
389 written_bytes += m_bf.serialize(out, child,
"bf");
390 written_bytes += m_wt.serialize(out, child,
"wt");
391 written_bytes += m_bl_rank.serialize(out, child,
"bl_rank");
392 written_bytes += m_bf_rank.serialize(out, child,
"bf_rank");
393 written_bytes += m_bl_select.serialize(out, child,
"bl_select");
394 written_bytes += m_bf_select.serialize(out, child,
"bf_select");
395 written_bytes += m_C.serialize(out, child,
"C");
396 written_bytes += m_C_bf_rank.serialize(out, child,
"C_bf_rank");
398 return written_bytes;
408 m_bl_rank.load(in, &m_bl);
409 m_bf_rank.load(in, &m_bf);
410 m_bl_select.load(in, &m_bl);
411 m_bf_select.load(in, &m_bf);
413 m_C_bf_rank.load(in);
417 template <
typename archive_t>
433 template <
typename archive_t>
441 m_bl_rank.set_vector(&m_bl);
443 m_bf_rank.set_vector(&m_bf);
445 m_bl_select.set_vector(&m_bl);
447 m_bf_select.set_vector(&m_bf);
455 return (m_size == other.m_size) && (m_bl == other.m_bl) && (m_bf == other.m_bf) && (m_wt == other.m_wt) &&
456 (m_bl_rank == other.m_bl_rank) && (m_bf_rank == other.m_bf_rank) && (m_bl_select == other.m_bl_select) &&
457 (m_bf_select == other.m_bf_select) && (m_C == other.m_C) && (m_C_bf_rank == other.m_C_bf_rank);
void close(bool remove_file=false)
Close the int_vector_buffer.
void push_back(const uint64_t value)
Appends the given element value to the end of the int_vector_buffer.
size_type size() const noexcept
The number of elements in the int_vector.
Generic iterator for a random access container.
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
A Wavelet Tree class for byte sequences.
t_bitvector bit_vector_type
wt_rlmn_trait< alphabet_category >::C_bf_rank_type C_bf_rank_type
t_wt::alphabet_category alphabet_category
random_access_const_iterator< wt_rlmn > const_iterator
wt_rlmn_trait< alphabet_category >::C_type C_type
bool operator==(wt_rlmn const &other) const noexcept
Equality operator.
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1].
t_select select_support_type
size_type size() const
Returns the size of the original vector.
t_wt::value_type value_type
void load(std::istream &in)
Loads the data structure from the given istream.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Load via cereal.
bool empty() const
Returns whether the wavelet tree contains no data.
wt_rlmn(wt_rlmn &&wt)
Move constructor.
value_type operator[](size_type i) const
Recovers the i-th symbol of the original vector.
wt_rlmn(const wt_rlmn &wt)
Copy constructor.
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many times symbol wt[i] occurs in the prefix [0..i-1].
bool operator!=(wt_rlmn const &other) const noexcept
Inequality operator.
wt_rlmn(t_it begin, t_it end, std::string tmp_dir=ram_file_name(""))
Construct the wavelet tree from a sequence defined by two interators.
int_vector ::size_type size_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
const_iterator begin() const
Returns a const_iterator to the first element.
wt_rlmn & operator=(const wt_rlmn &wt)
Assignment operator.
t_bitvector::difference_type difference_type
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal.
const_iterator end() const
Returns a const_iterator to the element after the last element.
size_type select(size_type i, value_type c) const
Calculates the ith occurrence of the symbol c in the supported vector.
wt_rlmn & operator=(wt_rlmn &&wt)
Assignment move operator.
int_vector.hpp contains the sdsl::int_vector class.
std::string to_string(const T &t, int w=1)
Namespace for the succinct data structure library.
std::string ram_file_name(const std::string &file)
Returns the corresponding RAM-file name for file.
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
void read_member(T &t, std::istream &in)
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
int remove(const std::string &)
Remove a file.
int_vector ::size_type size(const range_type &r)
Size of a range.
sd_vector.hpp contains the sdsl::sd_vector class, and classes which support rank and select for sd_ve...
Contains declarations and definitions of data structure concepts.
static SDSL_CONSTEXPR uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
int_vector< 64 > C_bf_rank_type
static int_vector< 64 > temp_C()
static C_bf_rank_type init_C_bf_rank(const C_type &, uint64_t)
static C_type init_C(C_type &C, uint64_t)
static C_type init_C(std::map< uint64_t, uint64_t > &C, uint64_t size)
static C_bf_rank_type init_C_bf_rank(const C_type &C, uint64_t size)
int_vector C_bf_rank_type
static std::map< uint64_t, uint64_t > temp_C()
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.
wt_huff.hpp contains a class for a Huffman shaped wavelet tree over byte sequences.