SDSL  3.0.0
Succinct Data Structure Library
csa_alphabet_strategy.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
9 #ifndef INCLUDED_CSA_ALPHABET_STRATEGY
10 #define INCLUDED_CSA_ALPHABET_STRATEGY
11 
12 // TODO: Strategy with 1-to-1 mapping and C_array type as template parameter
13 // This can be also used for a integer based CSA.
14 
15 /* A alphabet strategy provides the following features:
16  * * Member `sigma` which contains the size (=number of unique symbols) of the alphabet.
17  * * Container `char2comp` which maps a symbol to a number [0..sigma-1]. The alphabetic
18  * order is preserved.
19  * * Container `comp2char` which is the inverse mapping of char2comp.
20  * * Container `C` contains the cumulative counts of occurrences. C[i] is the cumulative
21  * count of occurrences of symbols `comp2char[0]` to `comp2char[i-1]` in the text.
22  * C is of size `sigma+1`.
23  * * Typedefs for the four above members:
24  * * char2comp_type
25  * * comp2char_type
26  * * C_type
27  * * sigma_type
28  * * Constructor. Takes a int_vector_buffer<8> for byte-alphabets
29  * and int_vector_buffer<0> for integer-alphabets.
30  *
31  * \par Note
32  * sigma_type has to be large enough to represent the alphabet size 2*sigma,
33  * since there is code which will perform a binary search on array `C`.
34  */
35 
36 #include <string>
37 
38 #include <sdsl/config.hpp>
39 #include <sdsl/int_vector.hpp>
40 #include <sdsl/rank_support.hpp>
41 #include <sdsl/sd_vector.hpp>
42 #include <sdsl/sdsl_concepts.hpp>
43 #include <sdsl/select_support.hpp>
44 
45 namespace sdsl
46 {
47 
48 // forward declarations
49 
50 class byte_alphabet;
51 
52 template <class bit_vector_type = bit_vector,
53  class rank_support_type = rank_support_scan<>,
54  class select_support_type = select_support_scan<>,
55  class C_array_type = int_vector<>>
56 class succinct_byte_alphabet;
57 
58 template <class bit_vector_type = sd_vector<>,
59  class rank_support_type = typename bit_vector_type::rank_1_type,
60  class select_support_type = typename bit_vector_type::select_1_type,
61  class C_array_type = int_vector<>>
62 class int_alphabet;
63 
64 template <uint8_t int_width>
65 constexpr const char * key_text()
66 {
67  return conf::KEY_TEXT_INT;
68 }
69 
70 template <uint8_t int_width>
71 constexpr const char * key_bwt()
72 {
73  return conf::KEY_BWT_INT;
74 }
75 
76 template <>
77 inline constexpr const char * key_text<8>()
78 {
79  return conf::KEY_TEXT;
80 }
81 
82 template <>
83 inline constexpr const char * key_bwt<8>()
84 {
85  return conf::KEY_BWT;
86 }
87 
88 template <class t_alphabet_strategy>
90 {
92 };
93 
94 template <>
96 {
98 };
99 
100 // see
101 // http://stackoverflow.com/questions/13514587/c-check-for-nested-typedef-of-a-template-parameter-to-get-its-scalar-base-type
102 // for the next three functions
103 
104 template <class t_wt, class t_enable = void>
106 {
107  typedef t_enable type;
108 };
109 
110 template <class t_wt>
111 struct wt_alphabet_trait<t_wt, typename enable_if_type<typename t_wt::alphabet_category>::type>
112 {
114 };
115 
117 
125 {
126  public:
131  typedef uint16_t sigma_type;
132  typedef uint8_t char_type;
133  typedef uint8_t comp_char_type;
134  typedef std::string string_type;
135  enum
136  {
137  int_width = 8
138  };
139 
141 
144  const C_type & C;
145  const sigma_type & sigma;
146 
147  private:
148  char2comp_type m_char2comp; // Mapping from a character into the compact alphabet.
149  comp2char_type m_comp2char; // Inverse mapping of m_char2comp.
150  C_type m_C; // Cumulative counts for the compact alphabet [0..sigma].
151  sigma_type m_sigma; // Effective size of the alphabet.
152  public:
155  : char2comp(m_char2comp)
156  , comp2char(m_comp2char)
157  , C(m_C)
158  , sigma(m_sigma)
159  , m_sigma(0)
160  {}
161 
163 
168  : char2comp(m_char2comp)
169  , comp2char(m_comp2char)
170  , C(m_C)
171  , sigma(m_sigma)
172  {
173  m_sigma = 0;
174  if (0 == len or 0 == text_buf.size()) return;
175  assert(len <= text_buf.size());
176  // initialize vectors
177  m_C = int_vector<64>(257, 0);
178  m_char2comp = int_vector<8>(256, 0);
179  m_comp2char = int_vector<8>(256, 0);
180  // count occurrences of each symbol
181  for (size_type i = 0; i < len; ++i) { ++m_C[text_buf[i]]; }
182  assert(1 == m_C[0]); // null-byte should occur exactly once
183  m_sigma = 0;
184  for (int i = 0; i < 256; ++i)
185  if (m_C[i])
186  {
187  m_char2comp[i] = m_sigma;
188  m_comp2char[sigma] = i;
189  m_C[m_sigma] = m_C[i];
190  ++m_sigma;
191  }
192  m_comp2char.resize(m_sigma);
193  m_C.resize(m_sigma + 1);
194  for (int i = (int)m_sigma; i > 0; --i) m_C[i] = m_C[i - 1];
195  m_C[0] = 0;
196  for (int i = 1; i <= (int)m_sigma; ++i) m_C[i] += m_C[i - 1];
197  assert(C[sigma] == len);
198  }
199 
201  : char2comp(m_char2comp)
202  , comp2char(m_comp2char)
203  , C(m_C)
204  , sigma(m_sigma)
205  , m_char2comp(bas.m_char2comp)
206  , m_comp2char(bas.m_comp2char)
207  , m_C(bas.m_C)
208  , m_sigma(bas.m_sigma)
209  {}
210 
212  : char2comp(m_char2comp)
213  , comp2char(m_comp2char)
214  , C(m_C)
215  , sigma(m_sigma)
216  , m_char2comp(std::move(bas.m_char2comp))
217  , m_comp2char(std::move(bas.m_comp2char))
218  , m_C(std::move(bas.m_C))
219  , m_sigma(bas.m_sigma)
220  {}
221 
223  {
224  if (this != &bas)
225  {
226  byte_alphabet tmp(bas);
227  *this = std::move(tmp);
228  }
229  return *this;
230  }
231 
233  {
234  if (this != &bas)
235  {
236  m_char2comp = std::move(bas.m_char2comp);
237  m_comp2char = std::move(bas.m_comp2char);
238  m_C = std::move(bas.m_C);
239  m_sigma = std::move(bas.m_sigma);
240  }
241  return *this;
242  }
243 
244  size_type serialize(std::ostream & out, structure_tree_node * v, std::string name = "") const
245  {
246  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
247  size_type written_bytes = 0;
248  written_bytes += m_char2comp.serialize(out, child, "m_char2comp");
249  written_bytes += m_comp2char.serialize(out, child, "m_comp2char");
250  written_bytes += m_C.serialize(out, child, "m_C");
251  written_bytes += write_member(m_sigma, out, child, "m_sigma");
252  structure_tree::add_size(child, written_bytes);
253  return written_bytes;
254  }
255 
256  void load(std::istream & in)
257  {
258  m_char2comp.load(in);
259  m_comp2char.load(in);
260  m_C.load(in);
261  read_member(m_sigma, in);
262  }
263 
265  bool operator==(byte_alphabet const & other) const noexcept
266  {
267  return (m_char2comp == other.m_char2comp) && (m_comp2char == other.m_comp2char) && (m_C == other.m_C) &&
268  (m_sigma == other.m_sigma);
269  }
270 
272  bool operator!=(byte_alphabet const & other) const noexcept { return !(*this == other); }
273 
274  template <typename archive_t>
275  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
276  {
277  ar(CEREAL_NVP(m_char2comp));
278  ar(CEREAL_NVP(m_comp2char));
279  ar(CEREAL_NVP(m_C));
280  ar(CEREAL_NVP(m_sigma));
281  }
282 
283  template <typename archive_t>
284  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
285  {
286  ar(CEREAL_NVP(m_char2comp));
287  ar(CEREAL_NVP(m_comp2char));
288  ar(CEREAL_NVP(m_C));
289  ar(CEREAL_NVP(m_sigma));
290  }
291 };
292 
294 
303 template <class bit_vector_type, class rank_support_type, class select_support_type, class C_array_type>
305 {
306  public:
307  class char2comp_wrapper;
308  class comp2char_wrapper;
309  friend class char2comp_wrapper;
310  friend class comp2char_wrapper;
311 
315  typedef C_array_type C_type;
316  typedef uint16_t sigma_type;
317  typedef uint8_t char_type;
318  typedef uint8_t comp_char_type;
319  typedef std::string string_type;
321  enum
322  {
323  int_width = 8
324  };
325 
328  {
329  private:
330  const succinct_byte_alphabet * m_strat;
331 
332  public:
334  : m_strat(strat)
335  {}
337  {
338  if (c >= m_strat->m_char.size() or !m_strat->m_char[c]) return (comp_char_type)0;
339  return (comp_char_type)m_strat->m_char_rank((size_type)c);
340  }
341  };
342 
345  {
346  private:
347  const succinct_byte_alphabet * m_strat;
348 
349  public:
351  : m_strat(strat)
352  {}
353  char_type operator[](comp_char_type c) const { return (char_type)m_strat->m_char_select(((size_type)c) + 1); }
354  };
355 
358  const C_type & C;
359  const sigma_type & sigma;
360 
361  private:
362  bit_vector_type m_char; // `m_char[i]` indicates if character with code i is present or not
363  rank_support_type m_char_rank; // rank data structure for `m_char` to answer char2comp
364  select_support_type m_char_select; // select data structure for `m_char` to answer comp2char
365  C_type m_C; // cumulative counts for the compact alphabet [0..sigma]
366  sigma_type m_sigma; // effective size of the alphabet
367 
368  public:
371  : char2comp(this)
372  , comp2char(this)
373  , C(m_C)
374  , sigma(m_sigma)
375  , m_sigma(0)
376  {}
377 
379 
384  : char2comp(this)
385  , comp2char(this)
386  , C(m_C)
387  , sigma(m_sigma)
388  {
389  m_sigma = 0;
390  if (0 == len or 0 == text_buf.size()) return;
391  assert(len <= text_buf.size());
392  // initialize vectors
393  int_vector<64> D(257, 0);
394  bit_vector tmp_char(256, 0);
395  // count occurrences of each symbol
396  for (size_type i = 0; i < len; ++i) { ++D[text_buf[i]]; }
397  assert(1 == D[0]); // null-byte should occur exactly once
398  m_sigma = 0;
399  for (int i = 0; i < 256; ++i)
400  if (D[i])
401  {
402  tmp_char[i] = 1; // mark occurring character
403  D[m_sigma] = D[i]; // compactify m_C
404  ++m_sigma;
405  }
406  // resize to sigma+1, since CSAs also need the sum of all elements
407  m_C = C_type(m_sigma + 1, 0, bits::hi(len) + 1);
408 
409  for (int i = (int)m_sigma; i > 0; --i) m_C[i] = D[i - 1];
410  m_C[0] = 0;
411  for (int i = 1; i <= (int)m_sigma; ++i) m_C[i] = m_C[i] + m_C[i - 1];
412  assert(m_C[sigma] == len);
413  m_char = tmp_char;
414  util::init_support(m_char_rank, &m_char);
415  util::init_support(m_char_select, &m_char);
416  }
417 
420  : char2comp(this)
421  , comp2char(this)
422  , C(m_C)
423  , sigma(m_sigma)
424  , m_char(strat.m_char)
425  , m_char_rank(strat.m_char_rank)
426  , m_char_select(strat.m_char_select)
427  , m_C(strat.m_C)
428  , m_sigma(strat.m_sigma)
429  {
430  m_char_rank.set_vector(&m_char);
431  m_char_select.set_vector(&m_char);
432  }
433 
436  : char2comp(this)
437  , comp2char(this)
438  , C(m_C)
439  , sigma(m_sigma)
440  , m_char(std::move(strat.m_char))
441  , m_char_rank(std::move(strat.m_char_rank))
442  , m_char_select(std::move(strat.m_char_select))
443  , m_C(std::move(strat.m_C))
444  , m_sigma(std::move(strat.m_sigma))
445  {
446  m_char_rank.set_vector(&m_char);
447  m_char_select.set_vector(&m_char);
448  }
449 
451  {
452  if (this != &strat)
453  {
454  succinct_byte_alphabet tmp(strat);
455  *this = std::move(tmp);
456  }
457  return *this;
458  }
459 
461  {
462  if (this != &strat)
463  {
464  m_char = std::move(strat.m_char);
465  m_char_rank = std::move(strat.m_char_rank);
466  m_char_rank.set_vector(&m_char);
467  m_char_select = std::move(strat.m_char_select);
468  m_char_select.set_vector(&m_char);
469  m_C = std::move(strat.m_C);
470  m_sigma = std::move(strat.m_sigma);
471  }
472  return *this;
473  }
474 
476  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
477  {
478  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
479  size_type written_bytes = 0;
480  written_bytes += m_char.serialize(out, child, "m_char");
481  written_bytes += m_char_rank.serialize(out, child, "m_char_rank");
482  written_bytes += m_char_select.serialize(out, child, "m_char_select");
483  written_bytes += m_C.serialize(out, child, "m_C");
484  written_bytes += write_member(m_sigma, out, child, "m_sigma");
485  structure_tree::add_size(child, written_bytes);
486  return written_bytes;
487  }
488 
490  void load(std::istream & in)
491  {
492  m_char.load(in);
493  m_char_rank.load(in);
494  m_char_rank.set_vector(&m_char);
495  m_char_select.load(in);
496  m_char_select.set_vector(&m_char);
497  m_C.load(in);
498  read_member(m_sigma, in);
499  }
500 
502  bool operator==(succinct_byte_alphabet const & other) const noexcept
503  {
504  return (m_char == other.m_char) && (m_char_rank == other.m_char_rank) &&
505  (m_char_select == other.m_char_select) && (m_C == other.m_C) && (m_sigma == other.m_sigma);
506  }
507 
509  bool operator!=(succinct_byte_alphabet const & other) const noexcept { return !(*this == other); }
510 
511  template <typename archive_t>
512  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
513  {
514  ar(CEREAL_NVP(m_char));
515  ar(CEREAL_NVP(m_char_rank));
516  ar(CEREAL_NVP(m_char_select));
517  ar(CEREAL_NVP(m_C));
518  ar(CEREAL_NVP(m_sigma));
519  }
520 
521  template <typename archive_t>
522  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
523  {
524  ar(CEREAL_NVP(m_char));
525  ar(CEREAL_NVP(m_char_rank));
526  m_char_rank.set_vector(&m_char);
527  ar(CEREAL_NVP(m_char_select));
528  m_char_select.set_vector(&m_char);
529  ar(CEREAL_NVP(m_C));
530  ar(CEREAL_NVP(m_sigma));
531  }
532 };
533 
534 template <typename bit_vector_type, typename size_type>
535 void init_char_bitvector(bit_vector_type & char_bv, const std::map<size_type, size_type> & D)
536 {
537  // note: the alphabet has at least size 1, so the following is safe:
538  auto largest_symbol = (--D.end())->first;
539  bit_vector tmp_char(largest_symbol + 1, 0);
540  for (const auto & x : D) { tmp_char[x.first] = 1; }
541  char_bv = tmp_char;
542 }
543 
544 template <typename t_hi_bit_vector, typename t_select_1, typename t_select_0, typename size_type>
546  const std::map<size_type, size_type> & D)
547 {
548  auto largest_symbol = (--D.end())->first;
549  sd_vector_builder builder(largest_symbol + 1, D.size());
550  for (const auto & x : D) { builder.set(x.first); }
551  char_bv = std::move(sd_vector<t_hi_bit_vector, t_select_1, t_select_0>(builder));
552 }
553 
560 {
561  public:
563  class mapping_wrapper;
564 
569  typedef uint16_t sigma_type;
570  typedef uint8_t char_type;
571  typedef uint8_t comp_char_type;
572  typedef std::string string_type;
574  enum
575  {
576  int_width = 8
577  };
578 
581  {
582  public:
584  mapping_wrapper() = default;
585 
587  constexpr char_type operator[](char_type const c) const noexcept { return c; }
588  };
589 
592  const C_type & C;
593  const sigma_type & sigma;
594 
595  private:
596  C_type m_C; // Cumulative counts for the compact alphabet [0..sigma].
597  sigma_type m_sigma; // Effective size of the alphabet.
598 
599  public:
602  : C(m_C)
603  , sigma(m_sigma)
604  , m_sigma(0)
605  {}
606 
612  : C(m_C)
613  , sigma(m_sigma)
614  {
615  m_sigma = 0;
616  if (0 == len || 0 == text_buf.size()) return;
617 
618  assert(len <= text_buf.size());
619 
620  // initialize vectors
621  m_C = int_vector<64>(257, 0);
622  // count occurrences of each symbol
623  for (size_type i = 0; i < len; ++i) ++m_C[text_buf[i]];
624 
625  assert(1 == m_C[0]); // null-byte should occur exactly once
626 
627  m_sigma = 255;
628  for (int i = 0; i < 256; ++i)
629  {
630  if (m_C[i])
631  {
632  m_sigma = i + 1;
633  // m_C[m_sigma] = m_C[i];
634  // ++m_sigma;
635  }
636  }
637  // m_C.resize(m_sigma + 1);
638  for (int i = (int)256; i > 0; --i) m_C[i] = m_C[i - 1];
639  m_C[0] = 0;
640  for (int i = 1; i <= (int)256; ++i) m_C[i] += m_C[i - 1];
641 
642  assert(C[sigma] == len);
643  }
644 
647  : C(m_C)
648  , sigma(m_sigma)
649  , m_C(strat.m_C)
650  , m_sigma(strat.m_sigma)
651  {}
652 
655  : C(m_C)
656  , sigma(m_sigma)
657  , m_C(std::move(strat.m_C))
658  , m_sigma(strat.m_sigma)
659  {}
660 
663  {
664  if (this != &strat)
665  {
666  plain_byte_alphabet tmp(strat);
667  *this = std::move(tmp);
668  }
669  return *this;
670  }
671 
674  {
675  if (this != &strat)
676  {
677  m_C = std::move(strat.m_C);
678  m_sigma = strat.m_sigma;
679  }
680  return *this;
681  }
682 
684  size_type serialize(std::ostream & out, structure_tree_node * v, std::string const & name = "") const
685  {
686  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
687  size_type written_bytes = 0;
688  written_bytes += m_C.serialize(out, child, "m_C");
689  written_bytes += write_member(m_sigma, out, child, "m_sigma");
690  structure_tree::add_size(child, written_bytes);
691  return written_bytes;
692  }
693 
694  void load(std::istream & in)
695  {
696  m_C.load(in);
697  read_member(m_sigma, in);
698  }
699 
700  template <typename archive_t>
701  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
702  {
703  ar(CEREAL_NVP(m_C));
704  ar(CEREAL_NVP(m_sigma));
705  }
706 
707  template <typename archive_t>
708  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
709  {
710  ar(CEREAL_NVP(m_C));
711  ar(CEREAL_NVP(m_sigma));
712  }
713 
714  bool operator==(plain_byte_alphabet const & other) const noexcept
715  {
716  return (m_C == other.m_C) && (m_sigma == other.m_sigma);
717  }
718 
719  bool operator!=(plain_byte_alphabet const & other) const noexcept { return !(*this == other); }
721 };
722 
724 
734 template <class bit_vector_type, class rank_support_type, class select_support_type, class C_array_type>
736 {
737  public:
738  class char2comp_wrapper;
739  class comp2char_wrapper;
740  friend class char2comp_wrapper;
741  friend class comp2char_wrapper;
742 
746  typedef C_array_type C_type;
747  typedef uint64_t sigma_type;
748  typedef uint64_t char_type;
749  typedef uint64_t comp_char_type;
750  typedef std::vector<char_type> string_type;
752  enum
753  {
754  int_width = 0
755  };
756 
759  {
760  private:
761  const int_alphabet * m_strat;
762 
763  public:
765  : m_strat(strat)
766  {}
768  {
769  if (m_strat->m_char.size() > 0)
770  { // if alphabet is not continuous
771  if (c >= m_strat->m_char.size() or !m_strat->m_char[c]) return (comp_char_type)0;
772  return (comp_char_type)m_strat->m_char_rank((size_type)c);
773  }
774  else
775  { // direct map if it is continuous
776  if (c >= m_strat->m_sigma) return 0;
777  return (comp_char_type)c;
778  }
779  return 0;
780  }
781  };
782 
785  {
786  private:
787  const int_alphabet * m_strat;
788 
789  public:
791  : m_strat(strat)
792  {}
794  {
795  if (m_strat->m_char.size() > 0)
796  { // if alphabet is not continuous
797  return (char_type)m_strat->m_char_select(((size_type)c) + 1);
798  }
799  else
800  { // direct map if it is continuous
801  return (char_type)c;
802  }
803  }
804  };
805 
808  const C_type & C;
809  const sigma_type & sigma;
810 
811  private:
812  bit_vector_type m_char; // `m_char[i]` indicates if character with code i is present or not
813  rank_support_type m_char_rank; // rank data structure for `m_char` to answer char2comp
814  select_support_type m_char_select; // select data structure for `m_char` to answer comp2char
815  C_type m_C; // cumulative counts for the compact alphabet [0..sigma]
816  sigma_type m_sigma; // effective size of the alphabet
817 
819  bool is_continuous_alphabet(std::map<size_type, size_type> & D)
820  {
821  if (D.size() == 0)
822  { // an empty alphabet is continuous
823  return true;
824  }
825  else
826  {
827  // max key + 1 == size of map
828  return ((--D.end())->first + 1) == D.size();
829  }
830  }
831 
832  public:
835  : char2comp(this)
836  , comp2char(this)
837  , C(m_C)
838  , sigma(m_sigma)
839  , m_sigma(0)
840  {}
841 
843 
848  : char2comp(this)
849  , comp2char(this)
850  , C(m_C)
851  , sigma(m_sigma)
852  {
853  m_sigma = 0;
854  if (0 == len or 0 == text_buf.size()) return;
855  assert(len <= text_buf.size());
856  // initialize vectors
857  std::map<size_type, size_type> D;
858  // count occurrences of each symbol
859  for (size_type i = 0; i < len; ++i) { D[text_buf[i]]++; }
860  m_sigma = D.size();
861  if (is_continuous_alphabet(D))
862  {
863  // do not initialize m_char, m_char_rank and m_char_select since we can map directly
864  }
865  else
866  {
867  init_char_bitvector(m_char, D);
868  }
869  assert(D.find(0) != D.end() and 1 == D[0]); // null-byte should occur exactly once
870 
871  // resize to sigma+1, since CSAs also need the sum of all elements
872  m_C = C_type(m_sigma + 1, 0, bits::hi(len) + 1);
873  size_type sum = 0, idx = 0;
874  for (std::map<size_type, size_type>::const_iterator it = D.begin(), end = D.end(); it != end; ++it)
875  {
876  m_C[idx++] = sum;
877  sum += it->second;
878  }
879  m_C[idx] = sum; // insert sum of all elements
880  }
881 
883  int_alphabet(const int_alphabet & strat)
884  : char2comp(this)
885  , comp2char(this)
886  , C(m_C)
887  , sigma(m_sigma)
888  , m_char(strat.m_char)
889  , m_char_rank(strat.m_char_rank)
890  , m_char_select(strat.m_char_select)
891  , m_C(strat.m_C)
892  , m_sigma(strat.m_sigma)
893  {
894  m_char_rank.set_vector(&m_char);
895  m_char_select.set_vector(&m_char);
896  }
897 
900  : char2comp(this)
901  , comp2char(this)
902  , C(m_C)
903  , sigma(m_sigma)
904  , m_char(std::move(strat.m_char))
905  , m_char_rank(std::move(strat.m_char_rank))
906  , m_char_select(std::move(strat.m_char_select))
907  , m_C(std::move(strat.m_C))
908  , m_sigma(std::move(strat.m_sigma))
909  {
910  m_char_rank.set_vector(&m_char);
911  m_char_select.set_vector(&m_char);
912  }
913 
915  {
916  if (this != &strat)
917  {
918  int_alphabet tmp(strat);
919  *this = std::move(tmp);
920  }
921  return *this;
922  }
923 
925  {
926  if (this != &strat)
927  {
928  m_char = std::move(strat.m_char);
929  m_char_rank = std::move(strat.m_char_rank);
930  m_char_rank.set_vector(&m_char);
931  m_char_select = std::move(strat.m_char_select);
932  m_char_select.set_vector(&m_char);
933  m_C = std::move(strat.m_C);
934  m_sigma = std::move(strat.m_sigma);
935  }
936  return *this;
937  }
938 
940  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
941  {
942  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
943  size_type written_bytes = 0;
944  written_bytes += m_char.serialize(out, child, "m_char");
945  written_bytes += m_char_rank.serialize(out, child, "m_char_rank");
946  written_bytes += m_char_select.serialize(out, child, "m_char_select");
947  written_bytes += m_C.serialize(out, child, "m_C");
948  written_bytes += write_member(m_sigma, out, child, "m_sigma");
949  structure_tree::add_size(child, written_bytes);
950  return written_bytes;
951  }
952 
954  void load(std::istream & in)
955  {
956  m_char.load(in);
957  m_char_rank.load(in);
958  m_char_rank.set_vector(&m_char);
959  m_char_select.load(in);
960  m_char_select.set_vector(&m_char);
961  m_C.load(in);
962  read_member(m_sigma, in);
963  }
964 
966  bool operator==(int_alphabet const & other) const noexcept
967  {
968  return (m_char == other.m_char) && (m_char_rank == other.m_char_rank) &&
969  (m_char_select == other.m_char_select) && (m_C == other.m_C) && (m_sigma == other.m_sigma);
970  }
971 
973  bool operator!=(int_alphabet const & other) const noexcept { return !(*this == other); }
974 
975  template <typename archive_t>
976  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
977  {
978  ar(CEREAL_NVP(m_char));
979  ar(CEREAL_NVP(m_char_rank));
980  ar(CEREAL_NVP(m_char_select));
981  ar(CEREAL_NVP(m_C));
982  ar(CEREAL_NVP(m_sigma));
983  }
984 
985  template <typename archive_t>
986  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
987  {
988  ar(CEREAL_NVP(m_char));
989  ar(CEREAL_NVP(m_char_rank));
990  m_char_rank.set_vector(&m_char);
991  ar(CEREAL_NVP(m_char_select));
992  m_char_select.set_vector(&m_char);
993  ar(CEREAL_NVP(m_C));
994  ar(CEREAL_NVP(m_sigma));
995  }
996 };
997 
998 } // end namespace sdsl
999 
1000 #endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:30
#define CEREAL_LOAD_FUNCTION_NAME
Definition: cereal.hpp:33
#define CEREAL_SAVE_FUNCTION_NAME
Definition: cereal.hpp:34
A simple space greedy representation for byte alphabets.
byte_alphabet_tag alphabet_category
bool operator!=(byte_alphabet const &other) const noexcept
Inequality operator.
bool operator==(byte_alphabet const &other) const noexcept
Equality operator.
byte_alphabet & operator=(byte_alphabet &&bas)
byte_alphabet & operator=(const byte_alphabet &bas)
const char2comp_type & char2comp
void load(std::istream &in)
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
byte_alphabet(int_vector_buffer< 8 > &text_buf, int_vector_size_type len)
Construct from a byte-stream.
byte_alphabet()
Default constructor.
int_vector ::size_type size_type
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
byte_alphabet(const byte_alphabet &bas)
size_type serialize(std::ostream &out, structure_tree_node *v, std::string name="") const
byte_alphabet(byte_alphabet &&bas)
const comp2char_type & comp2char
Helper class for the char2comp mapping.
comp_char_type operator[](char_type c) const
Helper class for the comp2char mapping.
char_type operator[](comp_char_type c) const
A space-efficient representation for byte alphabets.
char2comp_wrapper char2comp_type
comp2char_wrapper comp2char_type
int_alphabet & operator=(const int_alphabet &strat)
int_alphabet(int_vector_buffer< 0 > &text_buf, int_vector_size_type len)
Construct from a byte-stream.
bool operator==(int_alphabet const &other) const noexcept
Equality operator.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
int_alphabet(int_alphabet &&strat)
Copy constructor.
int_alphabet_tag alphabet_category
int_vector ::size_type size_type
std::vector< char_type > string_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize method.
int_alphabet()
Default constructor.
const sigma_type & sigma
void load(std::istream &in)
Load method.
int_alphabet(const int_alphabet &strat)
Copy constructor.
const comp2char_type comp2char
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
const char2comp_type char2comp
bool operator!=(int_alphabet const &other) const noexcept
Inequality operator.
int_alphabet & operator=(int_alphabet &&strat)
uint64_t size() const
Returns the number of elements currently stored.
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
void resize(const size_type size)
Resize the int_vector in terms of elements.
Definition: int_vector.hpp:545
Helper class for the char2comp and comp2char mapping.
mapping_wrapper()=default
Default constructor.
constexpr char_type operator[](char_type const c) const noexcept
Random access operator.
Provides an alphabet mapping that implements an identity map (i.e.
int_vector ::size_type size_type
plain_byte_alphabet & operator=(plain_byte_alphabet const &strat)
Copy assignment.
plain_byte_alphabet(plain_byte_alphabet const &strat)
Copy constructor.
plain_byte_alphabet & operator=(plain_byte_alphabet &&strat) noexcept
Move assignment.
plain_byte_alphabet(plain_byte_alphabet &&strat) noexcept
Move constructor.
plain_byte_alphabet()
Default constructor.
plain_byte_alphabet(int_vector_buffer< 8 > &text_buf, int_vector_size_type len)
Construct from a byte-stream.
Class for in-place construction of sd_vector from a strictly increasing sequence.
Definition: sd_vector.hpp:43
void set(size_type i)
Set a bit to 1.
Definition: sd_vector.hpp:77
A bit vector which compresses very sparse populated bit vectors by.
Definition: sd_vector.hpp:115
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
Helper class for the char2comp mapping.
char2comp_wrapper(const succinct_byte_alphabet *strat)
Helper class for the comp2char mapping.
comp2char_wrapper(const succinct_byte_alphabet *strat)
A space-efficient representation for byte alphabets.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
succinct_byte_alphabet(succinct_byte_alphabet &&strat)
Move constructor.
bool operator==(succinct_byte_alphabet const &other) const noexcept
Equality operator.
succinct_byte_alphabet()
Default constructor.
bool operator!=(succinct_byte_alphabet const &other) const noexcept
Inequality operator.
void load(std::istream &in)
Load method.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
succinct_byte_alphabet(int_vector_buffer< 8 > &text_buf, int_vector_size_type len)
Construct from a byte-stream.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize method.
succinct_byte_alphabet & operator=(succinct_byte_alphabet &&strat)
succinct_byte_alphabet(const succinct_byte_alphabet &strat)
Copy constructor.
succinct_byte_alphabet & operator=(const succinct_byte_alphabet &strat)
int_vector.hpp contains the sdsl::int_vector class.
constexpr char KEY_BWT_INT[]
Definition: config.hpp:36
constexpr char KEY_TEXT[]
Definition: config.hpp:41
constexpr char KEY_TEXT_INT[]
Definition: config.hpp:42
constexpr char KEY_BWT[]
Definition: config.hpp:35
Namespace for the succinct data structure library.
std::enable_if< has_serialize< X >::value, typename X::size_type >::type serialize(const X &x, std::ostream &out, structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:131
void init_char_bitvector(bit_vector_type &char_bv, const std::map< size_type, size_type > &D)
std::enable_if< has_load< X >::value, void >::type load(X &x, std::istream &in)
Definition: io.hpp:154
bool operator==(const track_allocator< T > &, const track_allocator< U > &)
bool operator!=(const track_allocator< T > &a, const track_allocator< U > &b)
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
Definition: int_vector.hpp:51
constexpr const char * key_bwt< 8 >()
uint64_t int_vector_size_type
Definition: config.hpp:48
constexpr const char * key_bwt()
constexpr const char * key_text()
constexpr const char * key_text< 8 >()
rank_support.hpp contains classes that support a sdsl::bit_vector with constant time rank information...
sd_vector.hpp contains the sdsl::sd_vector class, and classes which support rank and select for sd_ve...
Contains declarations and definitions of data structure concepts.
select_support.hpp contains classes that support a sdsl::bit_vector with constant time select informa...
static SDSL_CONSTEXPR uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:661