Alexandria  2.22.0
Please provide a description of the project.
AsciiReader.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2012-2021 Euclid Science Ground Segment
3  *
4  * This library is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU Lesser General Public License as published by the Free
6  * Software Foundation; either version 3.0 of the License, or (at your option)
7  * any later version.
8  *
9  * This library is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU Lesser General Public License
15  * along with this library; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
25 #include <boost/algorithm/string.hpp>
26 #include <fstream>
27 #include <set>
28 
29 #if BOOST_VERSION < 107300
30 #include <boost/io/detail/quoted_manip.hpp>
31 #else
32 #include <boost/io/quoted.hpp>
33 #endif
34 
37 #include "Table/AsciiReader.h"
38 
39 #include "AsciiReaderHelper.h"
40 #include "ReaderHelper.h"
41 
42 namespace Euclid {
43 namespace Table {
44 
45 AsciiReader::AsciiReader(std::istream& stream) : AsciiReader(InstOrRefHolder<std::istream>::create(stream)) {}
46 
47 AsciiReader::AsciiReader(const std::string& filename) : AsciiReader(create<std::ifstream>(filename)) {}
48 
50  : m_stream_holder(std::move(stream_holder)) {}
51 
53  if (m_reading_started) {
54  throw Elements::Exception() << "Changing comment indicator after reading "
55  << "has started is not allowed";
56  }
57  if (indicator.empty()) {
58  throw Elements::Exception() << "Empty string as comment indicator";
59  }
60  m_comment = indicator;
61  return *this;
62 }
63 
65  if (m_reading_started) {
66  throw Elements::Exception() << "Fixing the column names after reading "
67  << "has started is not allowed";
68  }
69 
70  m_column_names = std::move(column_names);
71 
73  static const regex::regex vertical_whitespace{".*[\\n\\v\\f\\r].*"}; // Checks if input contains any whitespace characters
74  for (const auto& name : m_column_names) {
75  if (name.empty()) {
76  throw Elements::Exception() << "Empty string column names are not allowed";
77  }
78  if (regex_match(name, vertical_whitespace)) {
79  throw Elements::Exception() << "Column name '" << name << "' contains "
80  << "vertical whitespace characters";
81  }
82  if (!set.insert(name).second) { // Check for duplicate names
83  throw Elements::Exception() << "Duplicate column name " << name;
84  }
85  }
87  throw Elements::Exception() << "Different number of column names and types";
88  }
89 
90  return *this;
91 }
92 
94  if (m_reading_started) {
95  throw Elements::Exception() << "Fixing the column types after reading "
96  << "has started is not allowed";
97  }
98 
99  m_column_types = std::move(column_types);
100 
102  throw Elements::Exception() << "Different number of column names and types";
103  }
104 
105  return *this;
106 }
107 
109  if (m_column_info != nullptr) {
110  return;
111  }
112  m_reading_started = true;
113 
114  auto& in = m_stream_holder->ref();
115 
116  size_t columns_number = countColumns(in, m_comment);
117  if (!m_column_names.empty() && m_column_names.size() != columns_number) {
118  throw Elements::Exception() << "Columns number in stream (" << columns_number
119  << ") does not match the column names number (" << m_column_names.size() << ")";
120  }
121  if (!m_column_types.empty() && m_column_types.size() != columns_number) {
122  throw Elements::Exception() << "Columns number in stream (" << columns_number
123  << ") does not match the column types number (" << m_column_types.size() << ")";
124  }
125 
126  auto auto_names = autoDetectColumnNames(in, m_comment, columns_number);
127  auto auto_desc = autoDetectColumnDescriptions(in, m_comment);
128 
129  std::vector<std::string> names{};
131  std::vector<std::string> units{};
132  std::vector<std::string> descriptions{};
133  for (size_t i = 0; i < columns_number; ++i) {
134  if (m_column_names.empty()) {
135  names.emplace_back(auto_names[i]);
136  } else {
137  names.emplace_back(m_column_names[i]);
138  }
139  auto info = auto_desc.find(auto_names[i]);
140  if (info != auto_desc.end()) {
141  if (m_column_types.empty()) {
142  types.emplace_back(info->second.type);
143  } else {
144  types.emplace_back(m_column_types[i]);
145  }
146  units.emplace_back(info->second.unit);
147  descriptions.emplace_back(info->second.description);
148  } else {
149  if (m_column_types.empty()) {
150  types.emplace_back(typeid(std::string));
151  } else {
152  types.emplace_back(m_column_types[i]);
153  }
154  units.emplace_back("");
155  descriptions.emplace_back("");
156  }
157  }
158  m_column_info = createColumnInfo(names, types, units, descriptions);
159 }
160 
162  readColumnInfo();
163  return *m_column_info;
164 }
165 
167  std::string line;
168  auto pos = in.tellg();
169  getline(in, line);
170  in.seekg(pos);
171  return line;
172 }
173 
175  std::ostringstream comment;
176 
177  m_reading_started = true;
178  auto& in = m_stream_holder->ref();
179  while (in && _peekLine(in).compare(0, m_comment.size(), m_comment) == 0) {
180  std::string line;
181  getline(in, line);
182  line = line.substr(m_comment.size());
183  boost::trim(line);
184  comment << line << '\n';
185  }
186 
187  auto full_comment = comment.str();
188  boost::trim(full_comment);
189  return full_comment;
190 }
191 
193  readColumnInfo();
194  auto& in = m_stream_holder->ref();
195 
196  std::vector<Row> row_list;
197  while (in && rows != 0) {
198  std::string line;
199  getline(in, line);
200  size_t comment_pos = line.find(m_comment);
201  if (comment_pos != std::string::npos) {
202  line = line.substr(0, comment_pos);
203  }
204  boost::trim(line);
205  if (!line.empty()) {
206  --rows;
207  std::stringstream line_stream(line);
208  size_t count{0};
210  std::string token;
211  line_stream >> token;
212  while (line_stream) {
213  if (count >= m_column_info->size()) {
214  throw Elements::Exception() << "Line with wrong number of cells: " << line;
215  }
216  values.push_back(convertToCellType(token, m_column_info->getDescription(count).type));
217  line_stream >> boost::io::quoted(token);
218  ++count;
219  }
220  row_list.push_back(Row{std::move(values), m_column_info});
221  }
222  }
223 
224  if (row_list.empty()) {
225  throw Elements::Exception() << "No more table rows left";
226  }
227  return Table{std::move(row_list)};
228 }
229 
230 void AsciiReader::skip(long rows) {
231  readColumnInfo();
232  auto& in = m_stream_holder->ref();
233 
234  while (in && rows != 0) {
235  std::string line;
236  getline(in, line);
237  size_t comment_pos = line.find(m_comment);
238  if (comment_pos != std::string::npos) {
239  line = line.substr(0, comment_pos);
240  }
241  boost::trim(line);
242  if (!line.empty()) {
243  --rows;
244  }
245  }
246 }
247 
249  return hasNextRow(m_stream_holder->ref(), m_comment);
250 }
251 
254 }
255 
256 } // namespace Table
257 } // namespace Euclid
TableReader implementation for reading ASCII tables from streams.
Definition: AsciiReader.h:87
void skip(long rows) override
Implements the TableReader::skip() contract.
std::vector< std::type_index > m_column_types
Definition: AsciiReader.h:230
std::string getComment() override
AsciiReader & fixColumnNames(std::vector< std::string > column_names)
Overrides the automatically detected column names.
Definition: AsciiReader.cpp:64
std::vector< std::string > m_column_names
Definition: AsciiReader.h:231
AsciiReader(std::istream &stream)
Constructs an AsciiReader which reads from the given stream.
Definition: AsciiReader.cpp:45
AsciiReader & fixColumnTypes(std::vector< std::type_index > column_types)
Overrides the automatically detected column types.
Definition: AsciiReader.cpp:93
Table readImpl(long rows) override
Reads the next rows into a Table.
bool hasMoreRows() override
Implements the TableReader::hasMoreRows() contract.
std::shared_ptr< ColumnInfo > m_column_info
Definition: AsciiReader.h:232
const ColumnInfo & getInfo() override
Returns the column information of the table.
std::size_t rowsLeft() override
Implements the TableReader::rowsLeft() contract.
std::unique_ptr< InstOrRefHolder< std::istream > > m_stream_holder
Definition: AsciiReader.h:227
AsciiReader & setCommentIndicator(const std::string &indicator)
Set the comment indicator.
Definition: AsciiReader.cpp:52
Provides information about the columns of a Table.
Definition: ColumnInfo.h:52
Represents one row of a Table.
Definition: Row.h:64
Represents a table.
Definition: Table.h:49
T emplace_back(T... args)
T empty(T... args)
T find(T... args)
T move(T... args)
bool hasNextRow(std::istream &in, const std::string &comment)
static std::string _peekLine(std::istream &in)
std::map< std::string, ColumnDescription > autoDetectColumnDescriptions(std::istream &in, const std::string &comment)
Reads the column descriptions of the given stream.
size_t countColumns(std::istream &in, const std::string &comment)
Returns the number of whitespace separated tokens of the first non commented line.
std::vector< std::string > autoDetectColumnNames(std::istream &in, const std::string &comment, size_t columns_number)
Reads the column names of the given stream.
Row::cell_type convertToCellType(const std::string &value, std::type_index type)
Converts the given value to a Row::cell_type of the given type.
std::size_t countRemainingRows(std::istream &in, const std::string &comment)
std::string quoted(const std::string &str)
std::shared_ptr< ColumnInfo > createColumnInfo(const std::vector< std::string > &names, const std::vector< std::type_index > &types, const std::vector< std::string > &units, const std::vector< std::string > &descriptions)
Creates a ColumnInfo object from the given names and types.
STL namespace.
T push_back(T... args)
T seekg(T... args)
T size(T... args)
T str(T... args)
T substr(T... args)
T tellg(T... args)