Alexandria  2.27.0
SDC-CH common library for the Euclid project
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
AsciiReader.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2012-2022 Euclid Science Ground Segment
3  *
4  * This library is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU Lesser General Public License as published by the Free
6  * Software Foundation; either version 3.0 of the License, or (at your option)
7  * any later version.
8  *
9  * This library is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU Lesser General Public License
15  * along with this library; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
25 #include <boost/algorithm/string.hpp>
26 #include <fstream>
27 #include <set>
28 
29 #if BOOST_VERSION < 107300
30 #include <boost/io/detail/quoted_manip.hpp>
31 #else
32 #include <boost/io/quoted.hpp>
33 #endif
34 
37 #include "Table/AsciiReader.h"
38 
39 #include "AsciiReaderHelper.h"
40 #include "ReaderHelper.h"
41 
42 namespace Euclid {
43 namespace Table {
44 
46  std::string line;
47  auto pos = in.tellg();
48  getline(in, line);
49  in.seekg(pos);
50  return line;
51 }
52 
53 AsciiReader::AsciiReader(std::istream& stream) : AsciiReader(InstOrRefHolder<std::istream>::create(stream)) {}
54 
55 AsciiReader::AsciiReader(const std::string& filename) : AsciiReader(create<std::ifstream>(filename)) {}
56 
58  : m_stream_holder(std::move(stream_holder)) {}
59 
61  if (m_reading_started) {
62  throw Elements::Exception() << "Changing comment indicator after reading "
63  << "has started is not allowed";
64  }
65  if (indicator.empty()) {
66  throw Elements::Exception() << "Empty string as comment indicator";
67  }
68  m_comment = indicator;
69  return *this;
70 }
71 
73  if (m_reading_started) {
74  throw Elements::Exception() << "Fixing the column names after reading "
75  << "has started is not allowed";
76  }
77 
78  m_column_names = std::move(column_names);
79 
81  static const regex::regex vertical_whitespace{".*[\\n\\v\\f\\r].*"}; // Checks if input contains any whitespace
82  // characters
83  for (const auto& name : m_column_names) {
84  if (name.empty()) {
85  throw Elements::Exception() << "Empty string column names are not allowed";
86  }
87  if (regex_match(name, vertical_whitespace)) {
88  throw Elements::Exception() << "Column name '" << name << "' contains "
89  << "vertical whitespace characters";
90  }
91  if (!set.insert(name).second) { // Check for duplicate names
92  throw Elements::Exception() << "Duplicate column name " << name;
93  }
94  }
95  if (!m_column_names.empty() && !m_column_types.empty() && m_column_names.size() != m_column_types.size()) {
96  throw Elements::Exception() << "Different number of column names and types";
97  }
98 
99  return *this;
100 }
101 
103  if (m_reading_started) {
104  throw Elements::Exception() << "Fixing the column types after reading "
105  << "has started is not allowed";
106  }
107 
108  std::transform(column_types.begin(), column_types.end(), std::back_inserter(m_column_types),
109  [](std::type_index type) { return std::make_pair(type, std::size_t(0)); });
110 
112  throw Elements::Exception() << "Different number of column names and types";
113  }
114 
115  return *this;
116 }
117 
119  if (m_reading_started) {
120  throw Elements::Exception() << "Fixing the column types after reading "
121  << "has started is not allowed";
122  }
123 
124  m_column_types = std::move(column_types);
125 
127  throw Elements::Exception() << "Different number of column names and types";
128  }
129 
130  return *this;
131 }
132 
134  if (m_column_info != nullptr) {
135  return;
136  }
137  m_reading_started = true;
138 
139  auto& in = m_stream_holder->ref();
140 
141  size_t columns_number = countColumns(in, m_comment);
142  if (!m_column_names.empty() && m_column_names.size() != columns_number) {
143  throw Elements::Exception() << "Columns number in stream (" << columns_number
144  << ") does not match the column names number (" << m_column_names.size() << ")";
145  }
146  if (!m_column_types.empty() && m_column_types.size() != columns_number) {
147  throw Elements::Exception() << "Columns number in stream (" << columns_number
148  << ") does not match the column types number (" << m_column_types.size() << ")";
149  }
150 
151  auto auto_names = autoDetectColumnNames(in, m_comment, columns_number);
152  auto auto_desc = autoDetectColumnDescriptions(in, m_comment);
153 
157  std::vector<std::string> descriptions;
158  auto first_line = firstDataLine(in, m_comment);
159 
160  for (size_t i = 0; i < columns_number; ++i) {
161  if (m_column_names.empty()) {
162  names.emplace_back(auto_names[i]);
163  } else {
164  names.emplace_back(m_column_names[i]);
165  }
166  auto info = auto_desc.find(auto_names[i]);
167  if (info != auto_desc.end()) {
168  if (m_column_types.empty()) {
169  types.emplace_back(info->second.type, info->second.size);
170  } else {
171  types.emplace_back(m_column_types[i]);
172  }
173  units.emplace_back(info->second.unit);
174  descriptions.emplace_back(info->second.description);
175  } else {
176  if (!m_column_types.empty()) {
177  types.emplace_back(m_column_types[i]);
178  } else if (i < first_line.size()) {
179  types.emplace_back(guessColumnType(first_line[i]));
180  } else {
181  types.emplace_back(typeid(std::string), 0);
182  }
183  units.emplace_back("");
184  descriptions.emplace_back("");
185  }
186  }
187  m_column_info = createColumnInfo(names, types, units, descriptions);
188 }
189 
191  readColumnInfo();
192  return *m_column_info;
193 }
194 
196  std::ostringstream comment;
197 
198  m_reading_started = true;
199  auto& in = m_stream_holder->ref();
200  while (in && _peekLine(in).compare(0, m_comment.size(), m_comment) == 0) {
201  std::string line;
202  getline(in, line);
203  line = line.substr(m_comment.size());
204  boost::trim(line);
205  comment << line << '\n';
206  }
207 
208  auto full_comment = comment.str();
209  boost::trim(full_comment);
210  return full_comment;
211 }
212 
214  readColumnInfo();
215  auto& in = m_stream_holder->ref();
216 
217  std::vector<Row> row_list;
218  while (in && rows != 0) {
219  std::string line;
220  getline(in, line);
221  auto tokens = splitLine(line, m_comment);
222  if (tokens.empty()) {
223  continue;
224  }
225  if (tokens.size() != m_column_info->size()) {
226  throw Elements::Exception() << "Line with wrong number of cells: " << line;
227  }
228 
230  values.reserve(tokens.size());
231  std::size_t index = 0;
232  std::transform(tokens.begin(), tokens.end(), std::back_inserter(values), [this, &index](const std::string& token) {
233  return convertToCellType(token, m_column_info->getDescription(index++).type);
234  });
235  row_list.push_back(Row{std::move(values), m_column_info});
236  }
237 
238  if (row_list.empty()) {
239  throw Elements::Exception() << "No more table rows left";
240  }
241  return Table{std::move(row_list)};
242 }
243 
244 void AsciiReader::skip(long rows) {
245  readColumnInfo();
246  auto& in = m_stream_holder->ref();
247 
248  while (in && rows != 0) {
249  std::string line;
250  getline(in, line);
251  size_t comment_pos = line.find(m_comment);
252  if (comment_pos != std::string::npos) {
253  line = line.substr(0, comment_pos);
254  }
255  boost::trim(line);
256  if (!line.empty()) {
257  --rows;
258  }
259  }
260 }
261 
263  return hasNextRow(m_stream_holder->ref(), m_comment);
264 }
265 
268 }
269 
270 } // namespace Table
271 } // namespace Euclid
std::size_t countRemainingRows(std::istream &in, const std::string &comment)
AsciiReader & setCommentIndicator(const std::string &indicator)
Set the comment indicator.
Definition: AsciiReader.cpp:60
T empty(T...args)
std::shared_ptr< ColumnInfo > m_column_info
Definition: AsciiReader.h:234
std::vector< std::string > splitLine(std::string line, const std::string &comment)
T end(T...args)
Row::cell_type convertToCellType(const std::string &value, std::type_index type)
Converts the given value to a Row::cell_type of the given type.
STL class.
const ColumnInfo & getInfo() override
Returns the column information of the table.
T seekg(T...args)
STL class.
T push_back(T...args)
std::vector< std::pair< std::type_index, std::size_t > > m_column_types
Definition: AsciiReader.h:232
std::size_t rowsLeft() override
Implements the TableReader::rowsLeft() contract.
std::unique_ptr< InstOrRefHolder< std::istream > > m_stream_holder
Definition: AsciiReader.h:229
Table readImpl(long rows) override
Reads the next rows into a Table.
AsciiReader(std::istream &stream)
Constructs an AsciiReader which reads from the given stream.
Definition: AsciiReader.cpp:53
TableReader implementation for reading ASCII tables from streams.
Definition: AsciiReader.h:87
T make_pair(T...args)
std::pair< std::type_index, std::size_t > guessColumnType(const std::string &token)
T move(T...args)
T tellg(T...args)
std::map< std::string, ColumnDescription > autoDetectColumnDescriptions(std::istream &in, const std::string &comment)
Reads the column descriptions of the given stream.
Represents one row of a Table.
Definition: Row.h:57
Represents a table.
Definition: Table.h:49
bool hasNextRow(std::istream &in, const std::string &comment)
T find(T...args)
T size(T...args)
STL class.
void skip(long rows) override
Implements the TableReader::skip() contract.
Provides information about the columns of a Table.
Definition: ColumnInfo.h:52
STL class.
AsciiReader & fixColumnTypes(std::vector< std::type_index > column_types)
Overrides the automatically detected column types.
std::vector< std::string > m_column_names
Definition: AsciiReader.h:233
T begin(T...args)
T back_inserter(T...args)
std::string getComment() override
std::shared_ptr< ColumnInfo > createColumnInfo(const std::vector< std::string > &names, const std::vector< std::pair< std::type_index, std::size_t >> &types, const std::vector< std::string > &units, const std::vector< std::string > &descriptions)
Creates a ColumnInfo object from the given names and types.
T substr(T...args)
static std::string _peekLine(std::istream &in)
Definition: AsciiReader.cpp:45
T transform(T...args)
bool hasMoreRows() override
Implements the TableReader::hasMoreRows() contract.
size_t countColumns(std::istream &in, const std::string &comment)
Returns the number of whitespace separated tokens of the first non commented line.
std::vector< std::string > firstDataLine(std::istream &in, const std::string &comment)
AsciiReader & fixColumnNames(std::vector< std::string > column_names)
Overrides the automatically detected column names.
Definition: AsciiReader.cpp:72
T reserve(T...args)
std::vector< std::string > autoDetectColumnNames(std::istream &in, const std::string &comment, size_t columns_number)
Reads the column names of the given stream.
T emplace_back(T...args)